• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "lowmemorykiller"
18 
19 #include <dirent.h>
20 #include <errno.h>
21 #include <inttypes.h>
22 #include <pwd.h>
23 #include <sched.h>
24 #include <signal.h>
25 #include <stdbool.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/cdefs.h>
29 #include <sys/epoll.h>
30 #include <sys/eventfd.h>
31 #include <sys/mman.h>
32 #include <sys/resource.h>
33 #include <sys/socket.h>
34 #include <sys/sysinfo.h>
35 #include <sys/time.h>
36 #include <sys/types.h>
37 #include <time.h>
38 #include <unistd.h>
39 
40 #include <cutils/properties.h>
41 #include <cutils/sched_policy.h>
42 #include <cutils/sockets.h>
43 #include <lmkd.h>
44 #include <log/log.h>
45 #include <log/log_event_list.h>
46 #include <log/log_time.h>
47 #include <psi/psi.h>
48 #include <system/thread_defs.h>
49 
50 #ifdef LMKD_LOG_STATS
51 #include "statslog.h"
52 #endif
53 
54 /*
55  * Define LMKD_TRACE_KILLS to record lmkd kills in kernel traces
56  * to profile and correlate with OOM kills
57  */
58 #ifdef LMKD_TRACE_KILLS
59 
60 #define ATRACE_TAG ATRACE_TAG_ALWAYS
61 #include <cutils/trace.h>
62 
63 #define TRACE_KILL_START(pid) ATRACE_INT(__FUNCTION__, pid);
64 #define TRACE_KILL_END()      ATRACE_INT(__FUNCTION__, 0);
65 
66 #else /* LMKD_TRACE_KILLS */
67 
68 #define TRACE_KILL_START(pid) ((void)(pid))
69 #define TRACE_KILL_END() ((void)0)
70 
71 #endif /* LMKD_TRACE_KILLS */
72 
73 #ifndef __unused
74 #define __unused __attribute__((__unused__))
75 #endif
76 
77 #define MEMCG_SYSFS_PATH "/dev/memcg/"
78 #define MEMCG_MEMORY_USAGE "/dev/memcg/memory.usage_in_bytes"
79 #define MEMCG_MEMORYSW_USAGE "/dev/memcg/memory.memsw.usage_in_bytes"
80 #define ZONEINFO_PATH "/proc/zoneinfo"
81 #define MEMINFO_PATH "/proc/meminfo"
82 #define LINE_MAX 128
83 
84 /* Android Logger event logtags (see event.logtags) */
85 #define MEMINFO_LOG_TAG 10195355
86 
87 /* gid containing AID_SYSTEM required */
88 #define INKERNEL_MINFREE_PATH "/sys/module/lowmemorykiller/parameters/minfree"
89 #define INKERNEL_ADJ_PATH "/sys/module/lowmemorykiller/parameters/adj"
90 
91 #define ARRAY_SIZE(x)   (sizeof(x) / sizeof(*(x)))
92 #define EIGHT_MEGA (1 << 23)
93 
94 #define TARGET_UPDATE_MIN_INTERVAL_MS 1000
95 
96 #define NS_PER_MS (NS_PER_SEC / MS_PER_SEC)
97 #define US_PER_MS (US_PER_SEC / MS_PER_SEC)
98 
99 /* Defined as ProcessList.SYSTEM_ADJ in ProcessList.java */
100 #define SYSTEM_ADJ (-900)
101 
102 #define STRINGIFY(x) STRINGIFY_INTERNAL(x)
103 #define STRINGIFY_INTERNAL(x) #x
104 
105 /*
106  * PSI monitor tracking window size.
107  * PSI monitor generates events at most once per window,
108  * therefore we poll memory state for the duration of
109  * PSI_WINDOW_SIZE_MS after the event happens.
110  */
111 #define PSI_WINDOW_SIZE_MS 1000
112 /* Polling period after initial PSI signal */
113 #define PSI_POLL_PERIOD_MS 10
114 /* Poll for the duration of one window after initial PSI signal */
115 #define PSI_POLL_COUNT (PSI_WINDOW_SIZE_MS / PSI_POLL_PERIOD_MS)
116 
117 #define min(a, b) (((a) < (b)) ? (a) : (b))
118 
119 #define FAIL_REPORT_RLIMIT_MS 1000
120 
121 /* default to old in-kernel interface if no memory pressure events */
122 static bool use_inkernel_interface = true;
123 static bool has_inkernel_module;
124 
125 /* memory pressure levels */
126 enum vmpressure_level {
127     VMPRESS_LEVEL_LOW = 0,
128     VMPRESS_LEVEL_MEDIUM,
129     VMPRESS_LEVEL_CRITICAL,
130     VMPRESS_LEVEL_COUNT
131 };
132 
133 static const char *level_name[] = {
134     "low",
135     "medium",
136     "critical"
137 };
138 
139 struct {
140     int64_t min_nr_free_pages; /* recorded but not used yet */
141     int64_t max_nr_free_pages;
142 } low_pressure_mem = { -1, -1 };
143 
144 struct psi_threshold {
145     enum psi_stall_type stall_type;
146     int threshold_ms;
147 };
148 
149 static int level_oomadj[VMPRESS_LEVEL_COUNT];
150 static int mpevfd[VMPRESS_LEVEL_COUNT] = { -1, -1, -1 };
151 static bool debug_process_killing;
152 static bool enable_pressure_upgrade;
153 static int64_t upgrade_pressure;
154 static int64_t downgrade_pressure;
155 static bool low_ram_device;
156 static bool kill_heaviest_task;
157 static unsigned long kill_timeout_ms;
158 static bool use_minfree_levels;
159 static bool per_app_memcg;
160 static int swap_free_low_percentage;
161 static bool use_psi_monitors = false;
162 static struct psi_threshold psi_thresholds[VMPRESS_LEVEL_COUNT] = {
163     { PSI_SOME, 70 },    /* 70ms out of 1sec for partial stall */
164     { PSI_SOME, 100 },   /* 100ms out of 1sec for partial stall */
165     { PSI_FULL, 70 },    /* 70ms out of 1sec for complete stall */
166 };
167 
168 static android_log_context ctx;
169 
170 /* data required to handle events */
171 struct event_handler_info {
172     int data;
173     void (*handler)(int data, uint32_t events);
174 };
175 
176 /* data required to handle socket events */
177 struct sock_event_handler_info {
178     int sock;
179     struct event_handler_info handler_info;
180 };
181 
182 /* max supported number of data connections */
183 #define MAX_DATA_CONN 2
184 
185 /* socket event handler data */
186 static struct sock_event_handler_info ctrl_sock;
187 static struct sock_event_handler_info data_sock[MAX_DATA_CONN];
188 
189 /* vmpressure event handler data */
190 static struct event_handler_info vmpressure_hinfo[VMPRESS_LEVEL_COUNT];
191 
192 /* 3 memory pressure levels, 1 ctrl listen socket, 2 ctrl data socket */
193 #define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT)
194 static int epollfd;
195 static int maxevents;
196 
197 /* OOM score values used by both kernel and framework */
198 #define OOM_SCORE_ADJ_MIN       (-1000)
199 #define OOM_SCORE_ADJ_MAX       1000
200 
201 static int lowmem_adj[MAX_TARGETS];
202 static int lowmem_minfree[MAX_TARGETS];
203 static int lowmem_targets_size;
204 
205 /* Fields to parse in /proc/zoneinfo */
206 enum zoneinfo_field {
207     ZI_NR_FREE_PAGES = 0,
208     ZI_NR_FILE_PAGES,
209     ZI_NR_SHMEM,
210     ZI_NR_UNEVICTABLE,
211     ZI_WORKINGSET_REFAULT,
212     ZI_HIGH,
213     ZI_FIELD_COUNT
214 };
215 
216 static const char* const zoneinfo_field_names[ZI_FIELD_COUNT] = {
217     "nr_free_pages",
218     "nr_file_pages",
219     "nr_shmem",
220     "nr_unevictable",
221     "workingset_refault",
222     "high",
223 };
224 
225 union zoneinfo {
226     struct {
227         int64_t nr_free_pages;
228         int64_t nr_file_pages;
229         int64_t nr_shmem;
230         int64_t nr_unevictable;
231         int64_t workingset_refault;
232         int64_t high;
233         /* fields below are calculated rather than read from the file */
234         int64_t totalreserve_pages;
235     } field;
236     int64_t arr[ZI_FIELD_COUNT];
237 };
238 
239 /* Fields to parse in /proc/meminfo */
240 enum meminfo_field {
241     MI_NR_FREE_PAGES = 0,
242     MI_CACHED,
243     MI_SWAP_CACHED,
244     MI_BUFFERS,
245     MI_SHMEM,
246     MI_UNEVICTABLE,
247     MI_TOTAL_SWAP,
248     MI_FREE_SWAP,
249     MI_ACTIVE_ANON,
250     MI_INACTIVE_ANON,
251     MI_ACTIVE_FILE,
252     MI_INACTIVE_FILE,
253     MI_SRECLAIMABLE,
254     MI_SUNRECLAIM,
255     MI_KERNEL_STACK,
256     MI_PAGE_TABLES,
257     MI_ION_HELP,
258     MI_ION_HELP_POOL,
259     MI_CMA_FREE,
260     MI_FIELD_COUNT
261 };
262 
263 static const char* const meminfo_field_names[MI_FIELD_COUNT] = {
264     "MemFree:",
265     "Cached:",
266     "SwapCached:",
267     "Buffers:",
268     "Shmem:",
269     "Unevictable:",
270     "SwapTotal:",
271     "SwapFree:",
272     "Active(anon):",
273     "Inactive(anon):",
274     "Active(file):",
275     "Inactive(file):",
276     "SReclaimable:",
277     "SUnreclaim:",
278     "KernelStack:",
279     "PageTables:",
280     "ION_heap:",
281     "ION_heap_pool:",
282     "CmaFree:",
283 };
284 
285 union meminfo {
286     struct {
287         int64_t nr_free_pages;
288         int64_t cached;
289         int64_t swap_cached;
290         int64_t buffers;
291         int64_t shmem;
292         int64_t unevictable;
293         int64_t total_swap;
294         int64_t free_swap;
295         int64_t active_anon;
296         int64_t inactive_anon;
297         int64_t active_file;
298         int64_t inactive_file;
299         int64_t sreclaimable;
300         int64_t sunreclaimable;
301         int64_t kernel_stack;
302         int64_t page_tables;
303         int64_t ion_heap;
304         int64_t ion_heap_pool;
305         int64_t cma_free;
306         /* fields below are calculated rather than read from the file */
307         int64_t nr_file_pages;
308     } field;
309     int64_t arr[MI_FIELD_COUNT];
310 };
311 
312 enum field_match_result {
313     NO_MATCH,
314     PARSE_FAIL,
315     PARSE_SUCCESS
316 };
317 
318 struct adjslot_list {
319     struct adjslot_list *next;
320     struct adjslot_list *prev;
321 };
322 
323 struct proc {
324     struct adjslot_list asl;
325     int pid;
326     uid_t uid;
327     int oomadj;
328     struct proc *pidhash_next;
329 };
330 
331 struct reread_data {
332     const char* const filename;
333     int fd;
334 };
335 
336 #ifdef LMKD_LOG_STATS
337 static bool enable_stats_log;
338 static android_log_context log_ctx;
339 #endif
340 
341 #define PIDHASH_SZ 1024
342 static struct proc *pidhash[PIDHASH_SZ];
343 #define pid_hashfn(x) ((((x) >> 8) ^ (x)) & (PIDHASH_SZ - 1))
344 
345 #define ADJTOSLOT(adj) ((adj) + -OOM_SCORE_ADJ_MIN)
346 #define ADJTOSLOT_COUNT (ADJTOSLOT(OOM_SCORE_ADJ_MAX) + 1)
347 static struct adjslot_list procadjslot_list[ADJTOSLOT_COUNT];
348 
349 #define MAX_DISTINCT_OOM_ADJ 32
350 #define KILLCNT_INVALID_IDX 0xFF
351 /*
352  * Because killcnt array is sparse a two-level indirection is used
353  * to keep the size small. killcnt_idx stores index of the element in
354  * killcnt array. Index KILLCNT_INVALID_IDX indicates an unused slot.
355  */
356 static uint8_t killcnt_idx[ADJTOSLOT_COUNT];
357 static uint16_t killcnt[MAX_DISTINCT_OOM_ADJ];
358 static int killcnt_free_idx = 0;
359 static uint32_t killcnt_total = 0;
360 
361 /* PAGE_SIZE / 1024 */
362 static long page_k;
363 
parse_int64(const char * str,int64_t * ret)364 static bool parse_int64(const char* str, int64_t* ret) {
365     char* endptr;
366     long long val = strtoll(str, &endptr, 10);
367     if (str == endptr || val > INT64_MAX) {
368         return false;
369     }
370     *ret = (int64_t)val;
371     return true;
372 }
373 
match_field(const char * cp,const char * ap,const char * const field_names[],int field_count,int64_t * field,int * field_idx)374 static enum field_match_result match_field(const char* cp, const char* ap,
375                                    const char* const field_names[],
376                                    int field_count, int64_t* field,
377                                    int *field_idx) {
378     int64_t val;
379     int i;
380 
381     for (i = 0; i < field_count; i++) {
382         if (!strcmp(cp, field_names[i])) {
383             *field_idx = i;
384             return parse_int64(ap, field) ? PARSE_SUCCESS : PARSE_FAIL;
385         }
386     }
387     return NO_MATCH;
388 }
389 
390 /*
391  * Read file content from the beginning up to max_len bytes or EOF
392  * whichever happens first.
393  */
read_all(int fd,char * buf,size_t max_len)394 static ssize_t read_all(int fd, char *buf, size_t max_len)
395 {
396     ssize_t ret = 0;
397     off_t offset = 0;
398 
399     while (max_len > 0) {
400         ssize_t r = TEMP_FAILURE_RETRY(pread(fd, buf, max_len, offset));
401         if (r == 0) {
402             break;
403         }
404         if (r == -1) {
405             return -1;
406         }
407         ret += r;
408         buf += r;
409         offset += r;
410         max_len -= r;
411     }
412 
413     return ret;
414 }
415 
416 /*
417  * Read a new or already opened file from the beginning.
418  * If the file has not been opened yet data->fd should be set to -1.
419  * To be used with files which are read often and possibly during high
420  * memory pressure to minimize file opening which by itself requires kernel
421  * memory allocation and might result in a stall on memory stressed system.
422  */
reread_file(struct reread_data * data,char * buf,size_t buf_size)423 static int reread_file(struct reread_data *data, char *buf, size_t buf_size) {
424     ssize_t size;
425 
426     if (data->fd == -1) {
427         data->fd = open(data->filename, O_RDONLY | O_CLOEXEC);
428         if (data->fd == -1) {
429             ALOGE("%s open: %s", data->filename, strerror(errno));
430             return -1;
431         }
432     }
433 
434     size = read_all(data->fd, buf, buf_size - 1);
435     if (size < 0) {
436         ALOGE("%s read: %s", data->filename, strerror(errno));
437         close(data->fd);
438         data->fd = -1;
439         return -1;
440     }
441     ALOG_ASSERT((size_t)size < buf_size - 1, "%s too large", data->filename);
442     buf[size] = 0;
443 
444     return 0;
445 }
446 
pid_lookup(int pid)447 static struct proc *pid_lookup(int pid) {
448     struct proc *procp;
449 
450     for (procp = pidhash[pid_hashfn(pid)]; procp && procp->pid != pid;
451          procp = procp->pidhash_next)
452             ;
453 
454     return procp;
455 }
456 
adjslot_insert(struct adjslot_list * head,struct adjslot_list * new)457 static void adjslot_insert(struct adjslot_list *head, struct adjslot_list *new)
458 {
459     struct adjslot_list *next = head->next;
460     new->prev = head;
461     new->next = next;
462     next->prev = new;
463     head->next = new;
464 }
465 
adjslot_remove(struct adjslot_list * old)466 static void adjslot_remove(struct adjslot_list *old)
467 {
468     struct adjslot_list *prev = old->prev;
469     struct adjslot_list *next = old->next;
470     next->prev = prev;
471     prev->next = next;
472 }
473 
adjslot_tail(struct adjslot_list * head)474 static struct adjslot_list *adjslot_tail(struct adjslot_list *head) {
475     struct adjslot_list *asl = head->prev;
476 
477     return asl == head ? NULL : asl;
478 }
479 
proc_slot(struct proc * procp)480 static void proc_slot(struct proc *procp) {
481     int adjslot = ADJTOSLOT(procp->oomadj);
482 
483     adjslot_insert(&procadjslot_list[adjslot], &procp->asl);
484 }
485 
proc_unslot(struct proc * procp)486 static void proc_unslot(struct proc *procp) {
487     adjslot_remove(&procp->asl);
488 }
489 
proc_insert(struct proc * procp)490 static void proc_insert(struct proc *procp) {
491     int hval = pid_hashfn(procp->pid);
492 
493     procp->pidhash_next = pidhash[hval];
494     pidhash[hval] = procp;
495     proc_slot(procp);
496 }
497 
pid_remove(int pid)498 static int pid_remove(int pid) {
499     int hval = pid_hashfn(pid);
500     struct proc *procp;
501     struct proc *prevp;
502 
503     for (procp = pidhash[hval], prevp = NULL; procp && procp->pid != pid;
504          procp = procp->pidhash_next)
505             prevp = procp;
506 
507     if (!procp)
508         return -1;
509 
510     if (!prevp)
511         pidhash[hval] = procp->pidhash_next;
512     else
513         prevp->pidhash_next = procp->pidhash_next;
514 
515     proc_unslot(procp);
516     free(procp);
517     return 0;
518 }
519 
520 /*
521  * Write a string to a file.
522  * Returns false if the file does not exist.
523  */
writefilestring(const char * path,const char * s,bool err_if_missing)524 static bool writefilestring(const char *path, const char *s,
525                             bool err_if_missing) {
526     int fd = open(path, O_WRONLY | O_CLOEXEC);
527     ssize_t len = strlen(s);
528     ssize_t ret;
529 
530     if (fd < 0) {
531         if (err_if_missing) {
532             ALOGE("Error opening %s; errno=%d", path, errno);
533         }
534         return false;
535     }
536 
537     ret = TEMP_FAILURE_RETRY(write(fd, s, len));
538     if (ret < 0) {
539         ALOGE("Error writing %s; errno=%d", path, errno);
540     } else if (ret < len) {
541         ALOGE("Short write on %s; length=%zd", path, ret);
542     }
543 
544     close(fd);
545     return true;
546 }
547 
get_time_diff_ms(struct timespec * from,struct timespec * to)548 static inline long get_time_diff_ms(struct timespec *from,
549                                     struct timespec *to) {
550     return (to->tv_sec - from->tv_sec) * (long)MS_PER_SEC +
551            (to->tv_nsec - from->tv_nsec) / (long)NS_PER_MS;
552 }
553 
cmd_procprio(LMKD_CTRL_PACKET packet)554 static void cmd_procprio(LMKD_CTRL_PACKET packet) {
555     struct proc *procp;
556     char path[80];
557     char val[20];
558     int soft_limit_mult;
559     struct lmk_procprio params;
560     bool is_system_server;
561     struct passwd *pwdrec;
562 
563     lmkd_pack_get_procprio(packet, &params);
564 
565     if (params.oomadj < OOM_SCORE_ADJ_MIN ||
566         params.oomadj > OOM_SCORE_ADJ_MAX) {
567         ALOGE("Invalid PROCPRIO oomadj argument %d", params.oomadj);
568         return;
569     }
570 
571     /* gid containing AID_READPROC required */
572     /* CAP_SYS_RESOURCE required */
573     /* CAP_DAC_OVERRIDE required */
574     snprintf(path, sizeof(path), "/proc/%d/oom_score_adj", params.pid);
575     snprintf(val, sizeof(val), "%d", params.oomadj);
576     if (!writefilestring(path, val, false)) {
577         ALOGW("Failed to open %s; errno=%d: process %d might have been killed",
578               path, errno, params.pid);
579         /* If this file does not exist the process is dead. */
580         return;
581     }
582 
583     if (use_inkernel_interface) {
584         return;
585     }
586 
587     if (per_app_memcg) {
588         if (params.oomadj >= 900) {
589             soft_limit_mult = 0;
590         } else if (params.oomadj >= 800) {
591             soft_limit_mult = 0;
592         } else if (params.oomadj >= 700) {
593             soft_limit_mult = 0;
594         } else if (params.oomadj >= 600) {
595             // Launcher should be perceptible, don't kill it.
596             params.oomadj = 200;
597             soft_limit_mult = 1;
598         } else if (params.oomadj >= 500) {
599             soft_limit_mult = 0;
600         } else if (params.oomadj >= 400) {
601             soft_limit_mult = 0;
602         } else if (params.oomadj >= 300) {
603             soft_limit_mult = 1;
604         } else if (params.oomadj >= 200) {
605             soft_limit_mult = 8;
606         } else if (params.oomadj >= 100) {
607             soft_limit_mult = 10;
608         } else if (params.oomadj >=   0) {
609             soft_limit_mult = 20;
610         } else {
611             // Persistent processes will have a large
612             // soft limit 512MB.
613             soft_limit_mult = 64;
614         }
615 
616         snprintf(path, sizeof(path), MEMCG_SYSFS_PATH
617                  "apps/uid_%d/pid_%d/memory.soft_limit_in_bytes",
618                  params.uid, params.pid);
619         snprintf(val, sizeof(val), "%d", soft_limit_mult * EIGHT_MEGA);
620 
621         /*
622          * system_server process has no memcg under /dev/memcg/apps but should be
623          * registered with lmkd. This is the best way so far to identify it.
624          */
625         is_system_server = (params.oomadj == SYSTEM_ADJ &&
626                             (pwdrec = getpwnam("system")) != NULL &&
627                             params.uid == pwdrec->pw_uid);
628         writefilestring(path, val, !is_system_server);
629     }
630 
631     procp = pid_lookup(params.pid);
632     if (!procp) {
633             procp = malloc(sizeof(struct proc));
634             if (!procp) {
635                 // Oh, the irony.  May need to rebuild our state.
636                 return;
637             }
638 
639             procp->pid = params.pid;
640             procp->uid = params.uid;
641             procp->oomadj = params.oomadj;
642             proc_insert(procp);
643     } else {
644         proc_unslot(procp);
645         procp->oomadj = params.oomadj;
646         proc_slot(procp);
647     }
648 }
649 
cmd_procremove(LMKD_CTRL_PACKET packet)650 static void cmd_procremove(LMKD_CTRL_PACKET packet) {
651     struct lmk_procremove params;
652 
653     if (use_inkernel_interface) {
654         return;
655     }
656 
657     lmkd_pack_get_procremove(packet, &params);
658     /*
659      * WARNING: After pid_remove() procp is freed and can't be used!
660      * Therefore placed at the end of the function.
661      */
662     pid_remove(params.pid);
663 }
664 
cmd_procpurge()665 static void cmd_procpurge() {
666     int i;
667     struct proc *procp;
668     struct proc *next;
669 
670     if (use_inkernel_interface) {
671         return;
672     }
673 
674     for (i = 0; i <= ADJTOSLOT(OOM_SCORE_ADJ_MAX); i++) {
675         procadjslot_list[i].next = &procadjslot_list[i];
676         procadjslot_list[i].prev = &procadjslot_list[i];
677     }
678 
679     for (i = 0; i < PIDHASH_SZ; i++) {
680         procp = pidhash[i];
681         while (procp) {
682             next = procp->pidhash_next;
683             free(procp);
684             procp = next;
685         }
686     }
687     memset(&pidhash[0], 0, sizeof(pidhash));
688 }
689 
inc_killcnt(int oomadj)690 static void inc_killcnt(int oomadj) {
691     int slot = ADJTOSLOT(oomadj);
692     uint8_t idx = killcnt_idx[slot];
693 
694     if (idx == KILLCNT_INVALID_IDX) {
695         /* index is not assigned for this oomadj */
696         if (killcnt_free_idx < MAX_DISTINCT_OOM_ADJ) {
697             killcnt_idx[slot] = killcnt_free_idx;
698             killcnt[killcnt_free_idx] = 1;
699             killcnt_free_idx++;
700         } else {
701             ALOGW("Number of distinct oomadj levels exceeds %d",
702                 MAX_DISTINCT_OOM_ADJ);
703         }
704     } else {
705         /*
706          * wraparound is highly unlikely and is detectable using total
707          * counter because it has to be equal to the sum of all counters
708          */
709         killcnt[idx]++;
710     }
711     /* increment total kill counter */
712     killcnt_total++;
713 }
714 
get_killcnt(int min_oomadj,int max_oomadj)715 static int get_killcnt(int min_oomadj, int max_oomadj) {
716     int slot;
717     int count = 0;
718 
719     if (min_oomadj > max_oomadj)
720         return 0;
721 
722     /* special case to get total kill count */
723     if (min_oomadj > OOM_SCORE_ADJ_MAX)
724         return killcnt_total;
725 
726     while (min_oomadj <= max_oomadj &&
727            (slot = ADJTOSLOT(min_oomadj)) < ADJTOSLOT_COUNT) {
728         uint8_t idx = killcnt_idx[slot];
729         if (idx != KILLCNT_INVALID_IDX) {
730             count += killcnt[idx];
731         }
732         min_oomadj++;
733     }
734 
735     return count;
736 }
737 
cmd_getkillcnt(LMKD_CTRL_PACKET packet)738 static int cmd_getkillcnt(LMKD_CTRL_PACKET packet) {
739     struct lmk_getkillcnt params;
740 
741     if (use_inkernel_interface) {
742         /* kernel driver does not expose this information */
743         return 0;
744     }
745 
746     lmkd_pack_get_getkillcnt(packet, &params);
747 
748     return get_killcnt(params.min_oomadj, params.max_oomadj);
749 }
750 
cmd_target(int ntargets,LMKD_CTRL_PACKET packet)751 static void cmd_target(int ntargets, LMKD_CTRL_PACKET packet) {
752     int i;
753     struct lmk_target target;
754     char minfree_str[PROPERTY_VALUE_MAX];
755     char *pstr = minfree_str;
756     char *pend = minfree_str + sizeof(minfree_str);
757     static struct timespec last_req_tm;
758     struct timespec curr_tm;
759 
760     if (ntargets < 1 || ntargets > (int)ARRAY_SIZE(lowmem_adj))
761         return;
762 
763     /*
764      * Ratelimit minfree updates to once per TARGET_UPDATE_MIN_INTERVAL_MS
765      * to prevent DoS attacks
766      */
767     if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
768         ALOGE("Failed to get current time");
769         return;
770     }
771 
772     if (get_time_diff_ms(&last_req_tm, &curr_tm) <
773         TARGET_UPDATE_MIN_INTERVAL_MS) {
774         ALOGE("Ignoring frequent updated to lmkd limits");
775         return;
776     }
777 
778     last_req_tm = curr_tm;
779 
780     for (i = 0; i < ntargets; i++) {
781         lmkd_pack_get_target(packet, i, &target);
782         lowmem_minfree[i] = target.minfree;
783         lowmem_adj[i] = target.oom_adj_score;
784 
785         pstr += snprintf(pstr, pend - pstr, "%d:%d,", target.minfree,
786             target.oom_adj_score);
787         if (pstr >= pend) {
788             /* if no more space in the buffer then terminate the loop */
789             pstr = pend;
790             break;
791         }
792     }
793 
794     lowmem_targets_size = ntargets;
795 
796     /* Override the last extra comma */
797     pstr[-1] = '\0';
798     property_set("sys.lmk.minfree_levels", minfree_str);
799 
800     if (has_inkernel_module) {
801         char minfreestr[128];
802         char killpriostr[128];
803 
804         minfreestr[0] = '\0';
805         killpriostr[0] = '\0';
806 
807         for (i = 0; i < lowmem_targets_size; i++) {
808             char val[40];
809 
810             if (i) {
811                 strlcat(minfreestr, ",", sizeof(minfreestr));
812                 strlcat(killpriostr, ",", sizeof(killpriostr));
813             }
814 
815             snprintf(val, sizeof(val), "%d", use_inkernel_interface ? lowmem_minfree[i] : 0);
816             strlcat(minfreestr, val, sizeof(minfreestr));
817             snprintf(val, sizeof(val), "%d", use_inkernel_interface ? lowmem_adj[i] : 0);
818             strlcat(killpriostr, val, sizeof(killpriostr));
819         }
820 
821         writefilestring(INKERNEL_MINFREE_PATH, minfreestr, true);
822         writefilestring(INKERNEL_ADJ_PATH, killpriostr, true);
823     }
824 }
825 
ctrl_data_close(int dsock_idx)826 static void ctrl_data_close(int dsock_idx) {
827     struct epoll_event epev;
828 
829     ALOGI("closing lmkd data connection");
830     if (epoll_ctl(epollfd, EPOLL_CTL_DEL, data_sock[dsock_idx].sock, &epev) == -1) {
831         // Log a warning and keep going
832         ALOGW("epoll_ctl for data connection socket failed; errno=%d", errno);
833     }
834     maxevents--;
835 
836     close(data_sock[dsock_idx].sock);
837     data_sock[dsock_idx].sock = -1;
838 }
839 
ctrl_data_read(int dsock_idx,char * buf,size_t bufsz)840 static int ctrl_data_read(int dsock_idx, char *buf, size_t bufsz) {
841     int ret = 0;
842 
843     ret = TEMP_FAILURE_RETRY(read(data_sock[dsock_idx].sock, buf, bufsz));
844 
845     if (ret == -1) {
846         ALOGE("control data socket read failed; errno=%d", errno);
847     } else if (ret == 0) {
848         ALOGE("Got EOF on control data socket");
849         ret = -1;
850     }
851 
852     return ret;
853 }
854 
ctrl_data_write(int dsock_idx,char * buf,size_t bufsz)855 static int ctrl_data_write(int dsock_idx, char *buf, size_t bufsz) {
856     int ret = 0;
857 
858     ret = TEMP_FAILURE_RETRY(write(data_sock[dsock_idx].sock, buf, bufsz));
859 
860     if (ret == -1) {
861         ALOGE("control data socket write failed; errno=%d", errno);
862     } else if (ret == 0) {
863         ALOGE("Got EOF on control data socket");
864         ret = -1;
865     }
866 
867     return ret;
868 }
869 
ctrl_command_handler(int dsock_idx)870 static void ctrl_command_handler(int dsock_idx) {
871     LMKD_CTRL_PACKET packet;
872     int len;
873     enum lmk_cmd cmd;
874     int nargs;
875     int targets;
876     int kill_cnt;
877 
878     len = ctrl_data_read(dsock_idx, (char *)packet, CTRL_PACKET_MAX_SIZE);
879     if (len <= 0)
880         return;
881 
882     if (len < (int)sizeof(int)) {
883         ALOGE("Wrong control socket read length len=%d", len);
884         return;
885     }
886 
887     cmd = lmkd_pack_get_cmd(packet);
888     nargs = len / sizeof(int) - 1;
889     if (nargs < 0)
890         goto wronglen;
891 
892     switch(cmd) {
893     case LMK_TARGET:
894         targets = nargs / 2;
895         if (nargs & 0x1 || targets > (int)ARRAY_SIZE(lowmem_adj))
896             goto wronglen;
897         cmd_target(targets, packet);
898         break;
899     case LMK_PROCPRIO:
900         if (nargs != 3)
901             goto wronglen;
902         cmd_procprio(packet);
903         break;
904     case LMK_PROCREMOVE:
905         if (nargs != 1)
906             goto wronglen;
907         cmd_procremove(packet);
908         break;
909     case LMK_PROCPURGE:
910         if (nargs != 0)
911             goto wronglen;
912         cmd_procpurge();
913         break;
914     case LMK_GETKILLCNT:
915         if (nargs != 2)
916             goto wronglen;
917         kill_cnt = cmd_getkillcnt(packet);
918         len = lmkd_pack_set_getkillcnt_repl(packet, kill_cnt);
919         if (ctrl_data_write(dsock_idx, (char *)packet, len) != len)
920             return;
921         break;
922     default:
923         ALOGE("Received unknown command code %d", cmd);
924         return;
925     }
926 
927     return;
928 
929 wronglen:
930     ALOGE("Wrong control socket read length cmd=%d len=%d", cmd, len);
931 }
932 
ctrl_data_handler(int data,uint32_t events)933 static void ctrl_data_handler(int data, uint32_t events) {
934     if (events & EPOLLIN) {
935         ctrl_command_handler(data);
936     }
937 }
938 
get_free_dsock()939 static int get_free_dsock() {
940     for (int i = 0; i < MAX_DATA_CONN; i++) {
941         if (data_sock[i].sock < 0) {
942             return i;
943         }
944     }
945     return -1;
946 }
947 
ctrl_connect_handler(int data __unused,uint32_t events __unused)948 static void ctrl_connect_handler(int data __unused, uint32_t events __unused) {
949     struct epoll_event epev;
950     int free_dscock_idx = get_free_dsock();
951 
952     if (free_dscock_idx < 0) {
953         /*
954          * Number of data connections exceeded max supported. This should not
955          * happen but if it does we drop all existing connections and accept
956          * the new one. This prevents inactive connections from monopolizing
957          * data socket and if we drop ActivityManager connection it will
958          * immediately reconnect.
959          */
960         for (int i = 0; i < MAX_DATA_CONN; i++) {
961             ctrl_data_close(i);
962         }
963         free_dscock_idx = 0;
964     }
965 
966     data_sock[free_dscock_idx].sock = accept(ctrl_sock.sock, NULL, NULL);
967     if (data_sock[free_dscock_idx].sock < 0) {
968         ALOGE("lmkd control socket accept failed; errno=%d", errno);
969         return;
970     }
971 
972     ALOGI("lmkd data connection established");
973     /* use data to store data connection idx */
974     data_sock[free_dscock_idx].handler_info.data = free_dscock_idx;
975     data_sock[free_dscock_idx].handler_info.handler = ctrl_data_handler;
976     epev.events = EPOLLIN;
977     epev.data.ptr = (void *)&(data_sock[free_dscock_idx].handler_info);
978     if (epoll_ctl(epollfd, EPOLL_CTL_ADD, data_sock[free_dscock_idx].sock, &epev) == -1) {
979         ALOGE("epoll_ctl for data connection socket failed; errno=%d", errno);
980         ctrl_data_close(free_dscock_idx);
981         return;
982     }
983     maxevents++;
984 }
985 
986 #ifdef LMKD_LOG_STATS
memory_stat_parse_line(char * line,struct memory_stat * mem_st)987 static void memory_stat_parse_line(char* line, struct memory_stat* mem_st) {
988     char key[LINE_MAX + 1];
989     int64_t value;
990 
991     sscanf(line, "%" STRINGIFY(LINE_MAX) "s  %" SCNd64 "", key, &value);
992 
993     if (strcmp(key, "total_") < 0) {
994         return;
995     }
996 
997     if (!strcmp(key, "total_pgfault"))
998         mem_st->pgfault = value;
999     else if (!strcmp(key, "total_pgmajfault"))
1000         mem_st->pgmajfault = value;
1001     else if (!strcmp(key, "total_rss"))
1002         mem_st->rss_in_bytes = value;
1003     else if (!strcmp(key, "total_cache"))
1004         mem_st->cache_in_bytes = value;
1005     else if (!strcmp(key, "total_swap"))
1006         mem_st->swap_in_bytes = value;
1007 }
1008 
memory_stat_from_cgroup(struct memory_stat * mem_st,int pid,uid_t uid)1009 static int memory_stat_from_cgroup(struct memory_stat* mem_st, int pid, uid_t uid) {
1010     FILE *fp;
1011     char buf[PATH_MAX];
1012 
1013     snprintf(buf, sizeof(buf), MEMCG_PROCESS_MEMORY_STAT_PATH, uid, pid);
1014 
1015     fp = fopen(buf, "r");
1016 
1017     if (fp == NULL) {
1018         ALOGE("%s open failed: %s", buf, strerror(errno));
1019         return -1;
1020     }
1021 
1022     while (fgets(buf, PAGE_SIZE, fp) != NULL) {
1023         memory_stat_parse_line(buf, mem_st);
1024     }
1025     fclose(fp);
1026 
1027     return 0;
1028 }
1029 
memory_stat_from_procfs(struct memory_stat * mem_st,int pid)1030 static int memory_stat_from_procfs(struct memory_stat* mem_st, int pid) {
1031     char path[PATH_MAX];
1032     char buffer[PROC_STAT_BUFFER_SIZE];
1033     int fd, ret;
1034 
1035     snprintf(path, sizeof(path), PROC_STAT_FILE_PATH, pid);
1036     if ((fd = open(path, O_RDONLY | O_CLOEXEC)) < 0) {
1037         ALOGE("%s open failed: %s", path, strerror(errno));
1038         return -1;
1039     }
1040 
1041     ret = read(fd, buffer, sizeof(buffer));
1042     if (ret < 0) {
1043         ALOGE("%s read failed: %s", path, strerror(errno));
1044         close(fd);
1045         return -1;
1046     }
1047     close(fd);
1048 
1049     // field 10 is pgfault
1050     // field 12 is pgmajfault
1051     // field 22 is starttime
1052     // field 24 is rss_in_pages
1053     int64_t pgfault = 0, pgmajfault = 0, starttime = 0, rss_in_pages = 0;
1054     if (sscanf(buffer,
1055                "%*u %*s %*s %*d %*d %*d %*d %*d %*d %" SCNd64 " %*d "
1056                "%" SCNd64 " %*d %*u %*u %*d %*d %*d %*d %*d %*d "
1057                "%" SCNd64 " %*d %" SCNd64 "",
1058                &pgfault, &pgmajfault, &starttime, &rss_in_pages) != 4) {
1059         return -1;
1060     }
1061     mem_st->pgfault = pgfault;
1062     mem_st->pgmajfault = pgmajfault;
1063     mem_st->rss_in_bytes = (rss_in_pages * PAGE_SIZE);
1064     mem_st->process_start_time_ns = starttime * (NS_PER_SEC / sysconf(_SC_CLK_TCK));
1065     return 0;
1066 }
1067 #endif
1068 
1069 /* /prop/zoneinfo parsing routines */
zoneinfo_parse_protection(char * cp)1070 static int64_t zoneinfo_parse_protection(char *cp) {
1071     int64_t max = 0;
1072     long long zoneval;
1073     char *save_ptr;
1074 
1075     for (cp = strtok_r(cp, "(), ", &save_ptr); cp;
1076          cp = strtok_r(NULL, "), ", &save_ptr)) {
1077         zoneval = strtoll(cp, &cp, 0);
1078         if (zoneval > max) {
1079             max = (zoneval > INT64_MAX) ? INT64_MAX : zoneval;
1080         }
1081     }
1082 
1083     return max;
1084 }
1085 
zoneinfo_parse_line(char * line,union zoneinfo * zi)1086 static bool zoneinfo_parse_line(char *line, union zoneinfo *zi) {
1087     char *cp = line;
1088     char *ap;
1089     char *save_ptr;
1090     int64_t val;
1091     int field_idx;
1092 
1093     cp = strtok_r(line, " ", &save_ptr);
1094     if (!cp) {
1095         return true;
1096     }
1097 
1098     if (!strcmp(cp, "protection:")) {
1099         ap = strtok_r(NULL, ")", &save_ptr);
1100     } else {
1101         ap = strtok_r(NULL, " ", &save_ptr);
1102     }
1103 
1104     if (!ap) {
1105         return true;
1106     }
1107 
1108     switch (match_field(cp, ap, zoneinfo_field_names,
1109                         ZI_FIELD_COUNT, &val, &field_idx)) {
1110     case (PARSE_SUCCESS):
1111         zi->arr[field_idx] += val;
1112         break;
1113     case (NO_MATCH):
1114         if (!strcmp(cp, "protection:")) {
1115             zi->field.totalreserve_pages +=
1116                 zoneinfo_parse_protection(ap);
1117         }
1118         break;
1119     case (PARSE_FAIL):
1120     default:
1121         return false;
1122     }
1123     return true;
1124 }
1125 
zoneinfo_parse(union zoneinfo * zi)1126 static int zoneinfo_parse(union zoneinfo *zi) {
1127     static struct reread_data file_data = {
1128         .filename = ZONEINFO_PATH,
1129         .fd = -1,
1130     };
1131     char buf[PAGE_SIZE];
1132     char *save_ptr;
1133     char *line;
1134 
1135     memset(zi, 0, sizeof(union zoneinfo));
1136 
1137     if (reread_file(&file_data, buf, sizeof(buf)) < 0) {
1138         return -1;
1139     }
1140 
1141     for (line = strtok_r(buf, "\n", &save_ptr); line;
1142          line = strtok_r(NULL, "\n", &save_ptr)) {
1143         if (!zoneinfo_parse_line(line, zi)) {
1144             ALOGE("%s parse error", file_data.filename);
1145             return -1;
1146         }
1147     }
1148     zi->field.totalreserve_pages += zi->field.high;
1149 
1150     return 0;
1151 }
1152 
1153 /* /prop/meminfo parsing routines */
meminfo_parse_line(char * line,union meminfo * mi)1154 static bool meminfo_parse_line(char *line, union meminfo *mi) {
1155     char *cp = line;
1156     char *ap;
1157     char *save_ptr;
1158     int64_t val;
1159     int field_idx;
1160     enum field_match_result match_res;
1161 
1162     cp = strtok_r(line, " ", &save_ptr);
1163     if (!cp) {
1164         return false;
1165     }
1166 
1167     ap = strtok_r(NULL, " ", &save_ptr);
1168     if (!ap) {
1169         return false;
1170     }
1171 
1172     match_res = match_field(cp, ap, meminfo_field_names, MI_FIELD_COUNT,
1173         &val, &field_idx);
1174     if (match_res == PARSE_SUCCESS) {
1175         mi->arr[field_idx] = val / page_k;
1176     }
1177     return (match_res != PARSE_FAIL);
1178 }
1179 
meminfo_parse(union meminfo * mi)1180 static int meminfo_parse(union meminfo *mi) {
1181     static struct reread_data file_data = {
1182         .filename = MEMINFO_PATH,
1183         .fd = -1,
1184     };
1185     char buf[PAGE_SIZE];
1186     char *save_ptr;
1187     char *line;
1188 
1189     memset(mi, 0, sizeof(union meminfo));
1190 
1191     if (reread_file(&file_data, buf, sizeof(buf)) < 0) {
1192         return -1;
1193     }
1194 
1195     for (line = strtok_r(buf, "\n", &save_ptr); line;
1196          line = strtok_r(NULL, "\n", &save_ptr)) {
1197         if (!meminfo_parse_line(line, mi)) {
1198             ALOGE("%s parse error", file_data.filename);
1199             return -1;
1200         }
1201     }
1202     mi->field.nr_file_pages = mi->field.cached + mi->field.swap_cached +
1203         mi->field.buffers;
1204 
1205     return 0;
1206 }
1207 
meminfo_log(union meminfo * mi)1208 static void meminfo_log(union meminfo *mi) {
1209     for (int field_idx = 0; field_idx < MI_FIELD_COUNT; field_idx++) {
1210         android_log_write_int32(ctx, (int32_t)min(mi->arr[field_idx] * page_k, INT32_MAX));
1211     }
1212 
1213     android_log_write_list(ctx, LOG_ID_EVENTS);
1214     android_log_reset(ctx);
1215 }
1216 
proc_get_size(int pid)1217 static int proc_get_size(int pid) {
1218     char path[PATH_MAX];
1219     char line[LINE_MAX];
1220     int fd;
1221     int rss = 0;
1222     int total;
1223     ssize_t ret;
1224 
1225     /* gid containing AID_READPROC required */
1226     snprintf(path, PATH_MAX, "/proc/%d/statm", pid);
1227     fd = open(path, O_RDONLY | O_CLOEXEC);
1228     if (fd == -1)
1229         return -1;
1230 
1231     ret = read_all(fd, line, sizeof(line) - 1);
1232     if (ret < 0) {
1233         close(fd);
1234         return -1;
1235     }
1236 
1237     sscanf(line, "%d %d ", &total, &rss);
1238     close(fd);
1239     return rss;
1240 }
1241 
proc_get_name(int pid)1242 static char *proc_get_name(int pid) {
1243     char path[PATH_MAX];
1244     static char line[LINE_MAX];
1245     int fd;
1246     char *cp;
1247     ssize_t ret;
1248 
1249     /* gid containing AID_READPROC required */
1250     snprintf(path, PATH_MAX, "/proc/%d/cmdline", pid);
1251     fd = open(path, O_RDONLY | O_CLOEXEC);
1252     if (fd == -1)
1253         return NULL;
1254     ret = read_all(fd, line, sizeof(line) - 1);
1255     close(fd);
1256     if (ret < 0) {
1257         return NULL;
1258     }
1259 
1260     cp = strchr(line, ' ');
1261     if (cp)
1262         *cp = '\0';
1263 
1264     return line;
1265 }
1266 
proc_adj_lru(int oomadj)1267 static struct proc *proc_adj_lru(int oomadj) {
1268     return (struct proc *)adjslot_tail(&procadjslot_list[ADJTOSLOT(oomadj)]);
1269 }
1270 
proc_get_heaviest(int oomadj)1271 static struct proc *proc_get_heaviest(int oomadj) {
1272     struct adjslot_list *head = &procadjslot_list[ADJTOSLOT(oomadj)];
1273     struct adjslot_list *curr = head->next;
1274     struct proc *maxprocp = NULL;
1275     int maxsize = 0;
1276     while (curr != head) {
1277         int pid = ((struct proc *)curr)->pid;
1278         int tasksize = proc_get_size(pid);
1279         if (tasksize <= 0) {
1280             struct adjslot_list *next = curr->next;
1281             pid_remove(pid);
1282             curr = next;
1283         } else {
1284             if (tasksize > maxsize) {
1285                 maxsize = tasksize;
1286                 maxprocp = (struct proc *)curr;
1287             }
1288             curr = curr->next;
1289         }
1290     }
1291     return maxprocp;
1292 }
1293 
set_process_group_and_prio(int pid,SchedPolicy sp,int prio)1294 static void set_process_group_and_prio(int pid, SchedPolicy sp, int prio) {
1295     DIR* d;
1296     char proc_path[PATH_MAX];
1297     struct dirent* de;
1298 
1299     snprintf(proc_path, sizeof(proc_path), "/proc/%d/task", pid);
1300     if (!(d = opendir(proc_path))) {
1301         ALOGW("Failed to open %s; errno=%d: process pid(%d) might have died", proc_path, errno,
1302               pid);
1303         return;
1304     }
1305 
1306     while ((de = readdir(d))) {
1307         int t_pid;
1308 
1309         if (de->d_name[0] == '.') continue;
1310         t_pid = atoi(de->d_name);
1311 
1312         if (!t_pid) {
1313             ALOGW("Failed to get t_pid for '%s' of pid(%d)", de->d_name, pid);
1314             continue;
1315         }
1316 
1317         if (setpriority(PRIO_PROCESS, t_pid, prio) && errno != ESRCH) {
1318             ALOGW("Unable to raise priority of killing t_pid (%d): errno=%d", t_pid, errno);
1319         }
1320 
1321         if (set_cpuset_policy(t_pid, sp)) {
1322             ALOGW("Failed to set_cpuset_policy on pid(%d) t_pid(%d) to %d", pid, t_pid, (int)sp);
1323             continue;
1324         }
1325     }
1326     closedir(d);
1327 }
1328 
1329 static int last_killed_pid = -1;
1330 
1331 /* Kill one process specified by procp.  Returns the size of the process killed */
kill_one_process(struct proc * procp,int min_oom_score)1332 static int kill_one_process(struct proc* procp, int min_oom_score) {
1333     int pid = procp->pid;
1334     uid_t uid = procp->uid;
1335     char *taskname;
1336     int tasksize;
1337     int r;
1338     int result = -1;
1339 
1340 #ifdef LMKD_LOG_STATS
1341     struct memory_stat mem_st = {};
1342     int memory_stat_parse_result = -1;
1343 #else
1344     /* To prevent unused parameter warning */
1345     (void)(min_oom_score);
1346 #endif
1347 
1348     taskname = proc_get_name(pid);
1349     if (!taskname) {
1350         goto out;
1351     }
1352 
1353     tasksize = proc_get_size(pid);
1354     if (tasksize <= 0) {
1355         goto out;
1356     }
1357 
1358 #ifdef LMKD_LOG_STATS
1359     if (enable_stats_log) {
1360         if (per_app_memcg) {
1361             memory_stat_parse_result = memory_stat_from_cgroup(&mem_st, pid, uid);
1362         } else {
1363             memory_stat_parse_result = memory_stat_from_procfs(&mem_st, pid);
1364         }
1365     }
1366 #endif
1367 
1368     TRACE_KILL_START(pid);
1369 
1370     /* CAP_KILL required */
1371     r = kill(pid, SIGKILL);
1372 
1373     set_process_group_and_prio(pid, SP_FOREGROUND, ANDROID_PRIORITY_HIGHEST);
1374 
1375     inc_killcnt(procp->oomadj);
1376     ALOGE("Kill '%s' (%d), uid %d, oom_adj %d to free %ldkB", taskname, pid, uid, procp->oomadj,
1377           tasksize * page_k);
1378 
1379     TRACE_KILL_END();
1380 
1381     last_killed_pid = pid;
1382 
1383     if (r) {
1384         ALOGE("kill(%d): errno=%d", pid, errno);
1385         goto out;
1386     } else {
1387 #ifdef LMKD_LOG_STATS
1388         if (memory_stat_parse_result == 0) {
1389             stats_write_lmk_kill_occurred(log_ctx, LMK_KILL_OCCURRED, uid, taskname,
1390                     procp->oomadj, mem_st.pgfault, mem_st.pgmajfault, mem_st.rss_in_bytes,
1391                     mem_st.cache_in_bytes, mem_st.swap_in_bytes, mem_st.process_start_time_ns,
1392                     min_oom_score);
1393         } else if (enable_stats_log) {
1394             stats_write_lmk_kill_occurred(log_ctx, LMK_KILL_OCCURRED, uid, taskname, procp->oomadj,
1395                                           -1, -1, tasksize * BYTES_IN_KILOBYTE, -1, -1, -1,
1396                                           min_oom_score);
1397         }
1398 #endif
1399         result = tasksize;
1400     }
1401 
1402 out:
1403     /*
1404      * WARNING: After pid_remove() procp is freed and can't be used!
1405      * Therefore placed at the end of the function.
1406      */
1407     pid_remove(pid);
1408     return result;
1409 }
1410 
1411 /*
1412  * Find one process to kill at or above the given oom_adj level.
1413  * Returns size of the killed process.
1414  */
find_and_kill_process(int min_score_adj)1415 static int find_and_kill_process(int min_score_adj) {
1416     int i;
1417     int killed_size = 0;
1418 
1419 #ifdef LMKD_LOG_STATS
1420     bool lmk_state_change_start = false;
1421 #endif
1422 
1423     for (i = OOM_SCORE_ADJ_MAX; i >= min_score_adj; i--) {
1424         struct proc *procp;
1425 
1426         while (true) {
1427             procp = kill_heaviest_task ?
1428                 proc_get_heaviest(i) : proc_adj_lru(i);
1429 
1430             if (!procp)
1431                 break;
1432 
1433             killed_size = kill_one_process(procp, min_score_adj);
1434             if (killed_size >= 0) {
1435 #ifdef LMKD_LOG_STATS
1436                 if (enable_stats_log && !lmk_state_change_start) {
1437                     lmk_state_change_start = true;
1438                     stats_write_lmk_state_changed(log_ctx, LMK_STATE_CHANGED,
1439                                                   LMK_STATE_CHANGE_START);
1440                 }
1441 #endif
1442                 break;
1443             }
1444         }
1445         if (killed_size) {
1446             break;
1447         }
1448     }
1449 
1450 #ifdef LMKD_LOG_STATS
1451     if (enable_stats_log && lmk_state_change_start) {
1452         stats_write_lmk_state_changed(log_ctx, LMK_STATE_CHANGED, LMK_STATE_CHANGE_STOP);
1453     }
1454 #endif
1455 
1456     return killed_size;
1457 }
1458 
get_memory_usage(struct reread_data * file_data)1459 static int64_t get_memory_usage(struct reread_data *file_data) {
1460     int ret;
1461     int64_t mem_usage;
1462     char buf[32];
1463 
1464     if (reread_file(file_data, buf, sizeof(buf)) < 0) {
1465         return -1;
1466     }
1467 
1468     if (!parse_int64(buf, &mem_usage)) {
1469         ALOGE("%s parse error", file_data->filename);
1470         return -1;
1471     }
1472     if (mem_usage == 0) {
1473         ALOGE("No memory!");
1474         return -1;
1475     }
1476     return mem_usage;
1477 }
1478 
record_low_pressure_levels(union meminfo * mi)1479 void record_low_pressure_levels(union meminfo *mi) {
1480     if (low_pressure_mem.min_nr_free_pages == -1 ||
1481         low_pressure_mem.min_nr_free_pages > mi->field.nr_free_pages) {
1482         if (debug_process_killing) {
1483             ALOGI("Low pressure min memory update from %" PRId64 " to %" PRId64,
1484                 low_pressure_mem.min_nr_free_pages, mi->field.nr_free_pages);
1485         }
1486         low_pressure_mem.min_nr_free_pages = mi->field.nr_free_pages;
1487     }
1488     /*
1489      * Free memory at low vmpressure events occasionally gets spikes,
1490      * possibly a stale low vmpressure event with memory already
1491      * freed up (no memory pressure should have been reported).
1492      * Ignore large jumps in max_nr_free_pages that would mess up our stats.
1493      */
1494     if (low_pressure_mem.max_nr_free_pages == -1 ||
1495         (low_pressure_mem.max_nr_free_pages < mi->field.nr_free_pages &&
1496          mi->field.nr_free_pages - low_pressure_mem.max_nr_free_pages <
1497          low_pressure_mem.max_nr_free_pages * 0.1)) {
1498         if (debug_process_killing) {
1499             ALOGI("Low pressure max memory update from %" PRId64 " to %" PRId64,
1500                 low_pressure_mem.max_nr_free_pages, mi->field.nr_free_pages);
1501         }
1502         low_pressure_mem.max_nr_free_pages = mi->field.nr_free_pages;
1503     }
1504 }
1505 
upgrade_level(enum vmpressure_level level)1506 enum vmpressure_level upgrade_level(enum vmpressure_level level) {
1507     return (enum vmpressure_level)((level < VMPRESS_LEVEL_CRITICAL) ?
1508         level + 1 : level);
1509 }
1510 
downgrade_level(enum vmpressure_level level)1511 enum vmpressure_level downgrade_level(enum vmpressure_level level) {
1512     return (enum vmpressure_level)((level > VMPRESS_LEVEL_LOW) ?
1513         level - 1 : level);
1514 }
1515 
is_kill_pending(void)1516 static bool is_kill_pending(void) {
1517     char buf[24];
1518 
1519     if (last_killed_pid < 0) {
1520         return false;
1521     }
1522 
1523     snprintf(buf, sizeof(buf), "/proc/%d/", last_killed_pid);
1524     if (access(buf, F_OK) == 0) {
1525         return true;
1526     }
1527 
1528     // reset last killed PID because there's nothing pending
1529     last_killed_pid = -1;
1530     return false;
1531 }
1532 
mp_event_common(int data,uint32_t events __unused)1533 static void mp_event_common(int data, uint32_t events __unused) {
1534     int ret;
1535     unsigned long long evcount;
1536     int64_t mem_usage, memsw_usage;
1537     int64_t mem_pressure;
1538     enum vmpressure_level lvl;
1539     union meminfo mi;
1540     union zoneinfo zi;
1541     struct timespec curr_tm;
1542     static struct timespec last_kill_tm;
1543     static unsigned long kill_skip_count = 0;
1544     enum vmpressure_level level = (enum vmpressure_level)data;
1545     long other_free = 0, other_file = 0;
1546     int min_score_adj;
1547     int minfree = 0;
1548     static struct reread_data mem_usage_file_data = {
1549         .filename = MEMCG_MEMORY_USAGE,
1550         .fd = -1,
1551     };
1552     static struct reread_data memsw_usage_file_data = {
1553         .filename = MEMCG_MEMORYSW_USAGE,
1554         .fd = -1,
1555     };
1556 
1557     if (debug_process_killing) {
1558         ALOGI("%s memory pressure event is triggered", level_name[level]);
1559     }
1560 
1561     if (!use_psi_monitors) {
1562         /*
1563          * Check all event counters from low to critical
1564          * and upgrade to the highest priority one. By reading
1565          * eventfd we also reset the event counters.
1566          */
1567         for (lvl = VMPRESS_LEVEL_LOW; lvl < VMPRESS_LEVEL_COUNT; lvl++) {
1568             if (mpevfd[lvl] != -1 &&
1569                 TEMP_FAILURE_RETRY(read(mpevfd[lvl],
1570                                    &evcount, sizeof(evcount))) > 0 &&
1571                 evcount > 0 && lvl > level) {
1572                 level = lvl;
1573             }
1574         }
1575     }
1576 
1577     if (clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm) != 0) {
1578         ALOGE("Failed to get current time");
1579         return;
1580     }
1581 
1582     if (kill_timeout_ms) {
1583         // If we're within the timeout, see if there's pending reclaim work
1584         // from the last killed process. If there is (as evidenced by
1585         // /proc/<pid> continuing to exist), skip killing for now.
1586         if ((get_time_diff_ms(&last_kill_tm, &curr_tm) < kill_timeout_ms) &&
1587             (low_ram_device || is_kill_pending())) {
1588             kill_skip_count++;
1589             return;
1590         }
1591     }
1592 
1593     if (kill_skip_count > 0) {
1594         ALOGI("%lu memory pressure events were skipped after a kill!",
1595               kill_skip_count);
1596         kill_skip_count = 0;
1597     }
1598 
1599     if (meminfo_parse(&mi) < 0 || zoneinfo_parse(&zi) < 0) {
1600         ALOGE("Failed to get free memory!");
1601         return;
1602     }
1603 
1604     if (use_minfree_levels) {
1605         int i;
1606 
1607         other_free = mi.field.nr_free_pages - zi.field.totalreserve_pages;
1608         if (mi.field.nr_file_pages > (mi.field.shmem + mi.field.unevictable + mi.field.swap_cached)) {
1609             other_file = (mi.field.nr_file_pages - mi.field.shmem -
1610                           mi.field.unevictable - mi.field.swap_cached);
1611         } else {
1612             other_file = 0;
1613         }
1614 
1615         min_score_adj = OOM_SCORE_ADJ_MAX + 1;
1616         for (i = 0; i < lowmem_targets_size; i++) {
1617             minfree = lowmem_minfree[i];
1618             if (other_free < minfree && other_file < minfree) {
1619                 min_score_adj = lowmem_adj[i];
1620                 break;
1621             }
1622         }
1623 
1624         if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
1625             if (debug_process_killing) {
1626                 ALOGI("Ignore %s memory pressure event "
1627                       "(free memory=%ldkB, cache=%ldkB, limit=%ldkB)",
1628                       level_name[level], other_free * page_k, other_file * page_k,
1629                       (long)lowmem_minfree[lowmem_targets_size - 1] * page_k);
1630             }
1631             return;
1632         }
1633 
1634         goto do_kill;
1635     }
1636 
1637     if (level == VMPRESS_LEVEL_LOW) {
1638         record_low_pressure_levels(&mi);
1639     }
1640 
1641     if (level_oomadj[level] > OOM_SCORE_ADJ_MAX) {
1642         /* Do not monitor this pressure level */
1643         return;
1644     }
1645 
1646     if ((mem_usage = get_memory_usage(&mem_usage_file_data)) < 0) {
1647         goto do_kill;
1648     }
1649     if ((memsw_usage = get_memory_usage(&memsw_usage_file_data)) < 0) {
1650         goto do_kill;
1651     }
1652 
1653     // Calculate percent for swappinness.
1654     mem_pressure = (mem_usage * 100) / memsw_usage;
1655 
1656     if (enable_pressure_upgrade && level != VMPRESS_LEVEL_CRITICAL) {
1657         // We are swapping too much.
1658         if (mem_pressure < upgrade_pressure) {
1659             level = upgrade_level(level);
1660             if (debug_process_killing) {
1661                 ALOGI("Event upgraded to %s", level_name[level]);
1662             }
1663         }
1664     }
1665 
1666     // If we still have enough swap space available, check if we want to
1667     // ignore/downgrade pressure events.
1668     if (mi.field.free_swap >=
1669         mi.field.total_swap * swap_free_low_percentage / 100) {
1670         // If the pressure is larger than downgrade_pressure lmk will not
1671         // kill any process, since enough memory is available.
1672         if (mem_pressure > downgrade_pressure) {
1673             if (debug_process_killing) {
1674                 ALOGI("Ignore %s memory pressure", level_name[level]);
1675             }
1676             return;
1677         } else if (level == VMPRESS_LEVEL_CRITICAL && mem_pressure > upgrade_pressure) {
1678             if (debug_process_killing) {
1679                 ALOGI("Downgrade critical memory pressure");
1680             }
1681             // Downgrade event, since enough memory available.
1682             level = downgrade_level(level);
1683         }
1684     }
1685 
1686 do_kill:
1687     if (low_ram_device) {
1688         /* For Go devices kill only one task */
1689         if (find_and_kill_process(level_oomadj[level]) == 0) {
1690             if (debug_process_killing) {
1691                 ALOGI("Nothing to kill");
1692             }
1693         } else {
1694             meminfo_log(&mi);
1695         }
1696     } else {
1697         int pages_freed;
1698         static struct timespec last_report_tm;
1699         static unsigned long report_skip_count = 0;
1700 
1701         if (!use_minfree_levels) {
1702             /* Free up enough memory to downgrate the memory pressure to low level */
1703             if (mi.field.nr_free_pages >= low_pressure_mem.max_nr_free_pages) {
1704                 if (debug_process_killing) {
1705                     ALOGI("Ignoring pressure since more memory is "
1706                         "available (%" PRId64 ") than watermark (%" PRId64 ")",
1707                         mi.field.nr_free_pages, low_pressure_mem.max_nr_free_pages);
1708                 }
1709                 return;
1710             }
1711             min_score_adj = level_oomadj[level];
1712         }
1713 
1714         pages_freed = find_and_kill_process(min_score_adj);
1715 
1716         if (pages_freed == 0) {
1717             /* Rate limit kill reports when nothing was reclaimed */
1718             if (get_time_diff_ms(&last_report_tm, &curr_tm) < FAIL_REPORT_RLIMIT_MS) {
1719                 report_skip_count++;
1720                 return;
1721             }
1722         } else {
1723             /* If we killed anything, update the last killed timestamp. */
1724             last_kill_tm = curr_tm;
1725         }
1726 
1727         /* Log meminfo whenever we kill or when report rate limit allows */
1728         meminfo_log(&mi);
1729 
1730         if (use_minfree_levels) {
1731             ALOGI("Reclaimed %ldkB, cache(%ldkB) and "
1732                 "free(%" PRId64 "kB)-reserved(%" PRId64 "kB) below min(%ldkB) for oom_adj %d",
1733                 pages_freed * page_k,
1734                 other_file * page_k, mi.field.nr_free_pages * page_k,
1735                 zi.field.totalreserve_pages * page_k,
1736                 minfree * page_k, min_score_adj);
1737         } else {
1738             ALOGI("Reclaimed %ldkB at oom_adj %d",
1739                 pages_freed * page_k, min_score_adj);
1740         }
1741 
1742         if (report_skip_count > 0) {
1743             ALOGI("Suppressed %lu failed kill reports", report_skip_count);
1744             report_skip_count = 0;
1745         }
1746 
1747         last_report_tm = curr_tm;
1748     }
1749 }
1750 
init_mp_psi(enum vmpressure_level level)1751 static bool init_mp_psi(enum vmpressure_level level) {
1752     int fd = init_psi_monitor(psi_thresholds[level].stall_type,
1753         psi_thresholds[level].threshold_ms * US_PER_MS,
1754         PSI_WINDOW_SIZE_MS * US_PER_MS);
1755 
1756     if (fd < 0) {
1757         return false;
1758     }
1759 
1760     vmpressure_hinfo[level].handler = mp_event_common;
1761     vmpressure_hinfo[level].data = level;
1762     if (register_psi_monitor(epollfd, fd, &vmpressure_hinfo[level]) < 0) {
1763         destroy_psi_monitor(fd);
1764         return false;
1765     }
1766     maxevents++;
1767     mpevfd[level] = fd;
1768 
1769     return true;
1770 }
1771 
destroy_mp_psi(enum vmpressure_level level)1772 static void destroy_mp_psi(enum vmpressure_level level) {
1773     int fd = mpevfd[level];
1774 
1775     if (unregister_psi_monitor(epollfd, fd) < 0) {
1776         ALOGE("Failed to unregister psi monitor for %s memory pressure; errno=%d",
1777             level_name[level], errno);
1778     }
1779     destroy_psi_monitor(fd);
1780     mpevfd[level] = -1;
1781 }
1782 
init_psi_monitors()1783 static bool init_psi_monitors() {
1784     if (!init_mp_psi(VMPRESS_LEVEL_LOW)) {
1785         return false;
1786     }
1787     if (!init_mp_psi(VMPRESS_LEVEL_MEDIUM)) {
1788         destroy_mp_psi(VMPRESS_LEVEL_LOW);
1789         return false;
1790     }
1791     if (!init_mp_psi(VMPRESS_LEVEL_CRITICAL)) {
1792         destroy_mp_psi(VMPRESS_LEVEL_MEDIUM);
1793         destroy_mp_psi(VMPRESS_LEVEL_LOW);
1794         return false;
1795     }
1796     return true;
1797 }
1798 
init_mp_common(enum vmpressure_level level)1799 static bool init_mp_common(enum vmpressure_level level) {
1800     int mpfd;
1801     int evfd;
1802     int evctlfd;
1803     char buf[256];
1804     struct epoll_event epev;
1805     int ret;
1806     int level_idx = (int)level;
1807     const char *levelstr = level_name[level_idx];
1808 
1809     /* gid containing AID_SYSTEM required */
1810     mpfd = open(MEMCG_SYSFS_PATH "memory.pressure_level", O_RDONLY | O_CLOEXEC);
1811     if (mpfd < 0) {
1812         ALOGI("No kernel memory.pressure_level support (errno=%d)", errno);
1813         goto err_open_mpfd;
1814     }
1815 
1816     evctlfd = open(MEMCG_SYSFS_PATH "cgroup.event_control", O_WRONLY | O_CLOEXEC);
1817     if (evctlfd < 0) {
1818         ALOGI("No kernel memory cgroup event control (errno=%d)", errno);
1819         goto err_open_evctlfd;
1820     }
1821 
1822     evfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
1823     if (evfd < 0) {
1824         ALOGE("eventfd failed for level %s; errno=%d", levelstr, errno);
1825         goto err_eventfd;
1826     }
1827 
1828     ret = snprintf(buf, sizeof(buf), "%d %d %s", evfd, mpfd, levelstr);
1829     if (ret >= (ssize_t)sizeof(buf)) {
1830         ALOGE("cgroup.event_control line overflow for level %s", levelstr);
1831         goto err;
1832     }
1833 
1834     ret = TEMP_FAILURE_RETRY(write(evctlfd, buf, strlen(buf) + 1));
1835     if (ret == -1) {
1836         ALOGE("cgroup.event_control write failed for level %s; errno=%d",
1837               levelstr, errno);
1838         goto err;
1839     }
1840 
1841     epev.events = EPOLLIN;
1842     /* use data to store event level */
1843     vmpressure_hinfo[level_idx].data = level_idx;
1844     vmpressure_hinfo[level_idx].handler = mp_event_common;
1845     epev.data.ptr = (void *)&vmpressure_hinfo[level_idx];
1846     ret = epoll_ctl(epollfd, EPOLL_CTL_ADD, evfd, &epev);
1847     if (ret == -1) {
1848         ALOGE("epoll_ctl for level %s failed; errno=%d", levelstr, errno);
1849         goto err;
1850     }
1851     maxevents++;
1852     mpevfd[level] = evfd;
1853     close(evctlfd);
1854     return true;
1855 
1856 err:
1857     close(evfd);
1858 err_eventfd:
1859     close(evctlfd);
1860 err_open_evctlfd:
1861     close(mpfd);
1862 err_open_mpfd:
1863     return false;
1864 }
1865 
init(void)1866 static int init(void) {
1867     struct epoll_event epev;
1868     int i;
1869     int ret;
1870 
1871     page_k = sysconf(_SC_PAGESIZE);
1872     if (page_k == -1)
1873         page_k = PAGE_SIZE;
1874     page_k /= 1024;
1875 
1876     epollfd = epoll_create(MAX_EPOLL_EVENTS);
1877     if (epollfd == -1) {
1878         ALOGE("epoll_create failed (errno=%d)", errno);
1879         return -1;
1880     }
1881 
1882     // mark data connections as not connected
1883     for (int i = 0; i < MAX_DATA_CONN; i++) {
1884         data_sock[i].sock = -1;
1885     }
1886 
1887     ctrl_sock.sock = android_get_control_socket("lmkd");
1888     if (ctrl_sock.sock < 0) {
1889         ALOGE("get lmkd control socket failed");
1890         return -1;
1891     }
1892 
1893     ret = listen(ctrl_sock.sock, MAX_DATA_CONN);
1894     if (ret < 0) {
1895         ALOGE("lmkd control socket listen failed (errno=%d)", errno);
1896         return -1;
1897     }
1898 
1899     epev.events = EPOLLIN;
1900     ctrl_sock.handler_info.handler = ctrl_connect_handler;
1901     epev.data.ptr = (void *)&(ctrl_sock.handler_info);
1902     if (epoll_ctl(epollfd, EPOLL_CTL_ADD, ctrl_sock.sock, &epev) == -1) {
1903         ALOGE("epoll_ctl for lmkd control socket failed (errno=%d)", errno);
1904         return -1;
1905     }
1906     maxevents++;
1907 
1908     has_inkernel_module = !access(INKERNEL_MINFREE_PATH, W_OK);
1909     use_inkernel_interface = has_inkernel_module;
1910 
1911     if (use_inkernel_interface) {
1912         ALOGI("Using in-kernel low memory killer interface");
1913     } else {
1914         /* Try to use psi monitor first if kernel has it */
1915         use_psi_monitors = property_get_bool("ro.lmk.use_psi", true) &&
1916             init_psi_monitors();
1917         /* Fall back to vmpressure */
1918         if (!use_psi_monitors &&
1919             (!init_mp_common(VMPRESS_LEVEL_LOW) ||
1920             !init_mp_common(VMPRESS_LEVEL_MEDIUM) ||
1921             !init_mp_common(VMPRESS_LEVEL_CRITICAL))) {
1922             ALOGE("Kernel does not support memory pressure events or in-kernel low memory killer");
1923             return -1;
1924         }
1925         if (use_psi_monitors) {
1926             ALOGI("Using psi monitors for memory pressure detection");
1927         } else {
1928             ALOGI("Using vmpressure for memory pressure detection");
1929         }
1930     }
1931 
1932     for (i = 0; i <= ADJTOSLOT(OOM_SCORE_ADJ_MAX); i++) {
1933         procadjslot_list[i].next = &procadjslot_list[i];
1934         procadjslot_list[i].prev = &procadjslot_list[i];
1935     }
1936 
1937     memset(killcnt_idx, KILLCNT_INVALID_IDX, sizeof(killcnt_idx));
1938 
1939     return 0;
1940 }
1941 
mainloop(void)1942 static void mainloop(void) {
1943     struct event_handler_info* handler_info;
1944     struct event_handler_info* poll_handler = NULL;
1945     struct timespec last_report_tm, curr_tm;
1946     struct epoll_event *evt;
1947     long delay = -1;
1948     int polling = 0;
1949 
1950     while (1) {
1951         struct epoll_event events[maxevents];
1952         int nevents;
1953         int i;
1954 
1955         if (polling) {
1956             /* Calculate next timeout */
1957             clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
1958             delay = get_time_diff_ms(&last_report_tm, &curr_tm);
1959             delay = (delay < PSI_POLL_PERIOD_MS) ?
1960                 PSI_POLL_PERIOD_MS - delay : PSI_POLL_PERIOD_MS;
1961 
1962             /* Wait for events until the next polling timeout */
1963             nevents = epoll_wait(epollfd, events, maxevents, delay);
1964 
1965             clock_gettime(CLOCK_MONOTONIC_COARSE, &curr_tm);
1966             if (get_time_diff_ms(&last_report_tm, &curr_tm) >= PSI_POLL_PERIOD_MS) {
1967                 polling--;
1968                 poll_handler->handler(poll_handler->data, 0);
1969                 last_report_tm = curr_tm;
1970             }
1971         } else {
1972             /* Wait for events with no timeout */
1973             nevents = epoll_wait(epollfd, events, maxevents, -1);
1974         }
1975 
1976         if (nevents == -1) {
1977             if (errno == EINTR)
1978                 continue;
1979             ALOGE("epoll_wait failed (errno=%d)", errno);
1980             continue;
1981         }
1982 
1983         /*
1984          * First pass to see if any data socket connections were dropped.
1985          * Dropped connection should be handled before any other events
1986          * to deallocate data connection and correctly handle cases when
1987          * connection gets dropped and reestablished in the same epoll cycle.
1988          * In such cases it's essential to handle connection closures first.
1989          */
1990         for (i = 0, evt = &events[0]; i < nevents; ++i, evt++) {
1991             if ((evt->events & EPOLLHUP) && evt->data.ptr) {
1992                 ALOGI("lmkd data connection dropped");
1993                 handler_info = (struct event_handler_info*)evt->data.ptr;
1994                 ctrl_data_close(handler_info->data);
1995             }
1996         }
1997 
1998         /* Second pass to handle all other events */
1999         for (i = 0, evt = &events[0]; i < nevents; ++i, evt++) {
2000             if (evt->events & EPOLLERR)
2001                 ALOGD("EPOLLERR on event #%d", i);
2002             if (evt->events & EPOLLHUP) {
2003                 /* This case was handled in the first pass */
2004                 continue;
2005             }
2006             if (evt->data.ptr) {
2007                 handler_info = (struct event_handler_info*)evt->data.ptr;
2008                 handler_info->handler(handler_info->data, evt->events);
2009 
2010                 if (use_psi_monitors && handler_info->handler == mp_event_common) {
2011                     /*
2012                      * Poll for the duration of PSI_WINDOW_SIZE_MS after the
2013                      * initial PSI event because psi events are rate-limited
2014                      * at one per sec.
2015                      */
2016                     polling = PSI_POLL_COUNT;
2017                     poll_handler = handler_info;
2018                     clock_gettime(CLOCK_MONOTONIC_COARSE, &last_report_tm);
2019                 }
2020             }
2021         }
2022     }
2023 }
2024 
main(int argc __unused,char ** argv __unused)2025 int main(int argc __unused, char **argv __unused) {
2026     struct sched_param param = {
2027             .sched_priority = 1,
2028     };
2029 
2030     /* By default disable low level vmpressure events */
2031     level_oomadj[VMPRESS_LEVEL_LOW] =
2032         property_get_int32("ro.lmk.low", OOM_SCORE_ADJ_MAX + 1);
2033     level_oomadj[VMPRESS_LEVEL_MEDIUM] =
2034         property_get_int32("ro.lmk.medium", 800);
2035     level_oomadj[VMPRESS_LEVEL_CRITICAL] =
2036         property_get_int32("ro.lmk.critical", 0);
2037     debug_process_killing = property_get_bool("ro.lmk.debug", false);
2038 
2039     /* By default disable upgrade/downgrade logic */
2040     enable_pressure_upgrade =
2041         property_get_bool("ro.lmk.critical_upgrade", false);
2042     upgrade_pressure =
2043         (int64_t)property_get_int32("ro.lmk.upgrade_pressure", 100);
2044     downgrade_pressure =
2045         (int64_t)property_get_int32("ro.lmk.downgrade_pressure", 100);
2046     kill_heaviest_task =
2047         property_get_bool("ro.lmk.kill_heaviest_task", false);
2048     low_ram_device = property_get_bool("ro.config.low_ram", false);
2049     kill_timeout_ms =
2050         (unsigned long)property_get_int32("ro.lmk.kill_timeout_ms", 0);
2051     use_minfree_levels =
2052         property_get_bool("ro.lmk.use_minfree_levels", false);
2053     per_app_memcg =
2054         property_get_bool("ro.config.per_app_memcg", low_ram_device);
2055     swap_free_low_percentage =
2056         property_get_int32("ro.lmk.swap_free_low_percentage", 10);
2057 
2058     ctx = create_android_logger(MEMINFO_LOG_TAG);
2059 
2060 #ifdef LMKD_LOG_STATS
2061     statslog_init(&log_ctx, &enable_stats_log);
2062 #endif
2063 
2064     if (!init()) {
2065         if (!use_inkernel_interface) {
2066             /*
2067              * MCL_ONFAULT pins pages as they fault instead of loading
2068              * everything immediately all at once. (Which would be bad,
2069              * because as of this writing, we have a lot of mapped pages we
2070              * never use.) Old kernels will see MCL_ONFAULT and fail with
2071              * EINVAL; we ignore this failure.
2072              *
2073              * N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT
2074              * pins ⊆ MCL_CURRENT, converging to just MCL_CURRENT as we fault
2075              * in pages.
2076              */
2077             /* CAP_IPC_LOCK required */
2078             if (mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) {
2079                 ALOGW("mlockall failed %s", strerror(errno));
2080             }
2081 
2082             /* CAP_NICE required */
2083             if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2084                 ALOGW("set SCHED_FIFO failed %s", strerror(errno));
2085             }
2086         }
2087 
2088         mainloop();
2089     }
2090 
2091 #ifdef LMKD_LOG_STATS
2092     statslog_destroy(&log_ctx);
2093 #endif
2094 
2095     android_log_destroy(&ctx);
2096 
2097     ALOGI("exiting");
2098     return 0;
2099 }
2100