1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "CachedAppOptimizer"
18 //#define LOG_NDEBUG 0
19 #define ATRACE_TAG ATRACE_TAG_ACTIVITY_MANAGER
20 #define ATRACE_COMPACTION_TRACK "Compaction"
21
22 #include <android-base/file.h>
23 #include <android-base/logging.h>
24 #include <android-base/stringprintf.h>
25 #include <android-base/unique_fd.h>
26 #include <android_runtime/AndroidRuntime.h>
27 #include <cutils/compiler.h>
28 #include <dirent.h>
29 #include <jni.h>
30 #include <linux/errno.h>
31 #include <linux/time.h>
32 #include <log/log.h>
33 #include <meminfo/procmeminfo.h>
34 #include <meminfo/sysmeminfo.h>
35 #include <nativehelper/JNIHelp.h>
36 #include <stddef.h>
37 #include <stdio.h>
38 #include <sys/mman.h>
39 #include <sys/pidfd.h>
40 #include <sys/stat.h>
41 #include <sys/syscall.h>
42 #include <sys/sysinfo.h>
43 #include <sys/types.h>
44 #include <unistd.h>
45 #include <utils/Timers.h>
46 #include <utils/Trace.h>
47
48 #include <algorithm>
49
50 using android::base::StringPrintf;
51 using android::base::WriteStringToFile;
52 using android::meminfo::ProcMemInfo;
53 using namespace android::meminfo;
54
55 static const size_t kPageSize = getpagesize();
56 static const size_t kPageMask = ~(kPageSize - 1);
57
58 #define COMPACT_ACTION_FILE_FLAG 1
59 #define COMPACT_ACTION_ANON_FLAG 2
60
61 using VmaToAdviseFunc = std::function<int(const Vma&)>;
62 using android::base::unique_fd;
63
64 #define MAX_RW_COUNT (INT_MAX & kPageMask)
65
66 // Defines the maximum amount of VMAs we can send per process_madvise syscall.
67 // Currently this is set to UIO_MAXIOV which is the maximum segments allowed by
68 // iovec implementation used by process_madvise syscall
69 #define MAX_VMAS_PER_BATCH UIO_MAXIOV
70
71 // Maximum bytes that we can send per process_madvise syscall once this limit
72 // is reached we split the remaining VMAs into another syscall. The MAX_RW_COUNT
73 // limit is imposed by iovec implementation. However, if you want to use a smaller
74 // limit, it has to be a page aligned value.
75 #define MAX_BYTES_PER_BATCH MAX_RW_COUNT
76
77 // Selected a high enough number to avoid clashing with linux errno codes
78 #define ERROR_COMPACTION_CANCELLED -1000
79
80 namespace android {
81
82 // Signal happening in separate thread that would bail out compaction
83 // before starting next VMA batch
84 static std::atomic<bool> cancelRunningCompaction;
85
86 // A VmaBatch represents a set of VMAs that can be processed
87 // as VMAs are processed by client code it is expected that the
88 // VMAs get consumed which means they are discarded as they are
89 // processed so that the first element always is the next element
90 // to be sent
91 struct VmaBatch {
92 struct iovec* vmas;
93 // total amount of VMAs to reach the end of iovec
94 size_t totalVmas;
95 // total amount of bytes that are remaining within iovec
96 uint64_t totalBytes;
97 };
98
99 // Advances the iterator by the specified amount of bytes.
100 // This is used to remove already processed or no longer
101 // needed parts of the batch.
102 // Returns total bytes consumed
consumeBytes(VmaBatch & batch,uint64_t bytesToConsume)103 uint64_t consumeBytes(VmaBatch& batch, uint64_t bytesToConsume) {
104 if (CC_UNLIKELY(bytesToConsume) < 0) {
105 LOG(ERROR) << "Cannot consume negative bytes for VMA batch !";
106 return 0;
107 }
108
109 if (CC_UNLIKELY(bytesToConsume > batch.totalBytes)) {
110 // Avoid consuming more bytes than available
111 bytesToConsume = batch.totalBytes;
112 }
113
114 uint64_t bytesConsumed = 0;
115 while (bytesConsumed < bytesToConsume) {
116 if (CC_UNLIKELY(batch.totalVmas == 0)) {
117 // No more vmas to consume
118 break;
119 }
120 if (CC_UNLIKELY(bytesConsumed + batch.vmas[0].iov_len > bytesToConsume)) {
121 // This vma can't be fully consumed, do it partially.
122 uint64_t bytesLeftToConsume = bytesToConsume - bytesConsumed;
123 bytesConsumed += bytesLeftToConsume;
124 batch.vmas[0].iov_base = (void*)((uint64_t)batch.vmas[0].iov_base + bytesLeftToConsume);
125 batch.vmas[0].iov_len -= bytesLeftToConsume;
126 batch.totalBytes -= bytesLeftToConsume;
127 return bytesConsumed;
128 }
129 // This vma can be fully consumed
130 bytesConsumed += batch.vmas[0].iov_len;
131 batch.totalBytes -= batch.vmas[0].iov_len;
132 --batch.totalVmas;
133 ++batch.vmas;
134 }
135
136 return bytesConsumed;
137 }
138
139 // given a source of vmas this class will act as a factory
140 // of VmaBatch objects and it will allow generating batches
141 // until there are no more left in the source vector.
142 // Note: the class does not actually modify the given
143 // vmas vector, instead it iterates on it until the end.
144 class VmaBatchCreator {
145 const std::vector<Vma>* sourceVmas;
146 const int totalVmasInSource;
147 // This is the destination array where batched VMAs will be stored
148 // it gets encapsulated into a VmaBatch which is the object
149 // meant to be used by client code.
150 struct iovec* destVmas;
151
152 // Parameters to keep track of the iterator on the source vmas
153 int currentIndex_;
154 uint64_t currentOffset_;
155
156 public:
VmaBatchCreator(const std::vector<Vma> * vmasToBatch,struct iovec * destVmasVec,int vmasInSource)157 VmaBatchCreator(const std::vector<Vma>* vmasToBatch, struct iovec* destVmasVec,
158 int vmasInSource)
159 : sourceVmas(vmasToBatch),
160 totalVmasInSource(vmasInSource),
161 destVmas(destVmasVec),
162 currentIndex_(0),
163 currentOffset_(0) {}
164
currentIndex()165 int currentIndex() { return currentIndex_; }
currentOffset()166 uint64_t currentOffset() { return currentOffset_; }
167
168 // Generates a batch and moves the iterator on the source vmas
169 // past the last VMA in the batch.
170 // Returns true on success, false on failure
createNextBatch(VmaBatch & batch)171 bool createNextBatch(VmaBatch& batch) {
172 if (currentIndex_ >= MAX_VMAS_PER_BATCH && currentIndex_ >= sourceVmas->size()) {
173 return false;
174 }
175
176 const std::vector<Vma>& vmas = *sourceVmas;
177 batch.vmas = destVmas;
178 uint64_t totalBytesInBatch = 0;
179 int indexInBatch = 0;
180
181 // Add VMAs to the batch up until we consumed all the VMAs or
182 // reached any imposed limit of VMAs per batch.
183 while (indexInBatch < MAX_VMAS_PER_BATCH && currentIndex_ < totalVmasInSource) {
184 uint64_t vmaStart = vmas[currentIndex_].start + currentOffset_;
185 uint64_t vmaSize = vmas[currentIndex_].end - vmaStart;
186 uint64_t bytesAvailableInBatch = MAX_BYTES_PER_BATCH - totalBytesInBatch;
187
188 batch.vmas[indexInBatch].iov_base = (void*)vmaStart;
189
190 if (vmaSize > bytesAvailableInBatch) {
191 // VMA would exceed the max available bytes in batch
192 // clamp with available bytes and finish batch.
193 vmaSize = bytesAvailableInBatch;
194 currentOffset_ += bytesAvailableInBatch;
195 }
196
197 batch.vmas[indexInBatch].iov_len = vmaSize;
198 totalBytesInBatch += vmaSize;
199
200 ++indexInBatch;
201 if (totalBytesInBatch >= MAX_BYTES_PER_BATCH) {
202 // Reached max bytes quota so this marks
203 // the end of the batch
204 if (CC_UNLIKELY(vmaSize == (vmas[currentIndex_].end - vmaStart))) {
205 // we reached max bytes exactly at the end of the vma
206 // so advance to next one
207 currentOffset_ = 0;
208 ++currentIndex_;
209 }
210 break;
211 }
212 // Fully finished current VMA, move to next one
213 currentOffset_ = 0;
214 ++currentIndex_;
215 }
216 batch.totalVmas = indexInBatch;
217 batch.totalBytes = totalBytesInBatch;
218 if (batch.totalVmas == 0 || batch.totalBytes == 0) {
219 // This is an empty batch, mark as failed creating.
220 return false;
221 }
222 return true;
223 }
224 };
225
226 // Madvise a set of VMAs given in a batch for a specific process
227 // The total number of bytes successfully madvised will be set on
228 // outBytesProcessed.
229 // Returns 0 on success and standard linux -errno code returned by
230 // process_madvise on failure
madviseVmasFromBatch(unique_fd & pidfd,VmaBatch & batch,int madviseType,uint64_t * outBytesProcessed)231 int madviseVmasFromBatch(unique_fd& pidfd, VmaBatch& batch, int madviseType,
232 uint64_t* outBytesProcessed) {
233 if (batch.totalVmas == 0 || batch.totalBytes == 0) {
234 // No VMAs in Batch, skip.
235 *outBytesProcessed = 0;
236 return 0;
237 }
238
239 ATRACE_BEGIN(StringPrintf("Madvise %d: %zu VMAs.", madviseType, batch.totalVmas).c_str());
240 int64_t bytesProcessedInSend =
241 process_madvise(pidfd, batch.vmas, batch.totalVmas, madviseType, 0);
242 ATRACE_END();
243 if (CC_UNLIKELY(bytesProcessedInSend == -1)) {
244 bytesProcessedInSend = 0;
245 if (errno != EINVAL) {
246 // Forward irrecoverable errors and bail out compaction
247 *outBytesProcessed = 0;
248 return -errno;
249 }
250 }
251 if (bytesProcessedInSend == 0) {
252 // When we find a VMA with error, fully consume it as it
253 // is extremely expensive to iterate on its pages one by one
254 bytesProcessedInSend = batch.vmas[0].iov_len;
255 } else if (bytesProcessedInSend < batch.totalBytes) {
256 // Partially processed the bytes requested
257 // skip last page which is where it failed.
258 bytesProcessedInSend += kPageSize;
259 }
260 bytesProcessedInSend = consumeBytes(batch, bytesProcessedInSend);
261
262 *outBytesProcessed = bytesProcessedInSend;
263 return 0;
264 }
265
266 // Legacy method for compacting processes, any new code should
267 // use compactProcess instead.
compactProcessProcfs(int pid,const std::string & compactionType)268 static inline void compactProcessProcfs(int pid, const std::string& compactionType) {
269 std::string reclaim_path = StringPrintf("/proc/%d/reclaim", pid);
270 WriteStringToFile(compactionType, reclaim_path);
271 }
272
273 // Compacts a set of VMAs for pid using an madviseType accepted by process_madvise syscall
274 // Returns the total bytes that where madvised.
275 //
276 // If any VMA fails compaction due to -EINVAL it will be skipped and continue.
277 // However, if it fails for any other reason, it will bail out and forward the error
compactMemory(const std::vector<Vma> & vmas,int pid,int madviseType,int totalVmas)278 static int64_t compactMemory(const std::vector<Vma>& vmas, int pid, int madviseType,
279 int totalVmas) {
280 if (totalVmas == 0) {
281 return 0;
282 }
283
284 unique_fd pidfd(pidfd_open(pid, 0));
285 if (pidfd < 0) {
286 // Skip compaction if failed to open pidfd with any error
287 return -errno;
288 }
289
290 struct iovec destVmas[MAX_VMAS_PER_BATCH];
291
292 VmaBatch batch;
293 VmaBatchCreator batcher(&vmas, destVmas, totalVmas);
294
295 int64_t totalBytesProcessed = 0;
296 while (batcher.createNextBatch(batch)) {
297 uint64_t bytesProcessedInSend;
298 ScopedTrace batchTrace(ATRACE_TAG, "VMA Batch");
299 do {
300 if (CC_UNLIKELY(cancelRunningCompaction.load())) {
301 // There could be a significant delay between when a compaction
302 // is requested and when it is handled during this time our
303 // OOM adjust could have improved.
304 LOG(DEBUG) << "Cancelled running compaction for " << pid;
305 ATRACE_INSTANT_FOR_TRACK(ATRACE_COMPACTION_TRACK,
306 StringPrintf("Cancelled compaction for %d", pid).c_str());
307 return ERROR_COMPACTION_CANCELLED;
308 }
309 int error = madviseVmasFromBatch(pidfd, batch, madviseType, &bytesProcessedInSend);
310 if (error < 0) {
311 // Returns standard linux errno code
312 return error;
313 }
314 if (CC_UNLIKELY(bytesProcessedInSend == 0)) {
315 // This means there was a problem consuming bytes,
316 // bail out since no forward progress can be made with this batch
317 break;
318 }
319 totalBytesProcessed += bytesProcessedInSend;
320 } while (batch.totalBytes > 0 && batch.totalVmas > 0);
321 }
322
323 return totalBytesProcessed;
324 }
325
getFilePageAdvice(const Vma & vma)326 static int getFilePageAdvice(const Vma& vma) {
327 if (vma.inode > 0 && !vma.is_shared) {
328 return MADV_COLD;
329 }
330 return -1;
331 }
getAnonPageAdvice(const Vma & vma)332 static int getAnonPageAdvice(const Vma& vma) {
333 bool hasReadFlag = (vma.flags & PROT_READ) > 0;
334 bool hasWriteFlag = (vma.flags & PROT_WRITE) > 0;
335 bool hasExecuteFlag = (vma.flags & PROT_EXEC) > 0;
336 if ((hasReadFlag || hasWriteFlag) && !hasExecuteFlag && !vma.is_shared) {
337 return MADV_PAGEOUT;
338 }
339 return -1;
340 }
getAnyPageAdvice(const Vma & vma)341 static int getAnyPageAdvice(const Vma& vma) {
342 if (vma.inode == 0 && !vma.is_shared) {
343 return MADV_PAGEOUT;
344 }
345 return MADV_COLD;
346 }
347
348 // Perform a full process compaction using process_madvise syscall
349 // using the madvise behavior defined by vmaToAdviseFunc per VMA.
350 //
351 // Currently supported behaviors are MADV_COLD and MADV_PAGEOUT.
352 //
353 // Returns the total number of bytes compacted on success. On error
354 // returns process_madvise errno code or if compaction was cancelled
355 // it returns ERROR_COMPACTION_CANCELLED.
356 //
357 // Not thread safe. We reuse vectors so we assume this is called only
358 // on one thread at most.
compactProcess(int pid,VmaToAdviseFunc vmaToAdviseFunc)359 static int64_t compactProcess(int pid, VmaToAdviseFunc vmaToAdviseFunc) {
360 cancelRunningCompaction.store(false);
361 static std::string mapsBuffer;
362 ATRACE_BEGIN("CollectVmas");
363 ProcMemInfo meminfo(pid);
364 static std::vector<Vma> pageoutVmas(2000), coldVmas(2000);
365 int coldVmaIndex = 0;
366 int pageoutVmaIndex = 0;
367 auto vmaCollectorCb = [&vmaToAdviseFunc, &pageoutVmaIndex, &coldVmaIndex](const Vma& vma) {
368 int advice = vmaToAdviseFunc(vma);
369 switch (advice) {
370 case MADV_COLD:
371 if (coldVmaIndex < coldVmas.size()) {
372 coldVmas[coldVmaIndex] = vma;
373 } else {
374 coldVmas.push_back(vma);
375 }
376 ++coldVmaIndex;
377 break;
378 case MADV_PAGEOUT:
379 #ifdef DEBUG_COMPACTION
380 ALOGE("Adding to compact vma=%s", vma.name.c_str());
381 #endif
382 if (pageoutVmaIndex < pageoutVmas.size()) {
383 pageoutVmas[pageoutVmaIndex] = vma;
384 } else {
385 pageoutVmas.push_back(vma);
386 }
387 ++pageoutVmaIndex;
388 break;
389 }
390 return true;
391 };
392 meminfo.ForEachVmaFromMaps(vmaCollectorCb, mapsBuffer);
393 ATRACE_END();
394 #ifdef DEBUG_COMPACTION
395 ALOGE("Total VMAs sent for compaction anon=%d file=%d", pageoutVmaIndex,
396 coldVmaIndex);
397 #endif
398
399 int64_t pageoutBytes = compactMemory(pageoutVmas, pid, MADV_PAGEOUT, pageoutVmaIndex);
400 if (pageoutBytes < 0) {
401 // Error, just forward it.
402 cancelRunningCompaction.store(false);
403 return pageoutBytes;
404 }
405
406 int64_t coldBytes = compactMemory(coldVmas, pid, MADV_COLD, coldVmaIndex);
407 if (coldBytes < 0) {
408 // Error, just forward it.
409 cancelRunningCompaction.store(false);
410 return coldBytes;
411 }
412
413 return pageoutBytes + coldBytes;
414 }
415
416 // Compact process using process_madvise syscall or fallback to procfs in
417 // case syscall does not exist.
compactProcessOrFallback(int pid,int compactionFlags)418 static void compactProcessOrFallback(int pid, int compactionFlags) {
419 if ((compactionFlags & (COMPACT_ACTION_ANON_FLAG | COMPACT_ACTION_FILE_FLAG)) == 0) return;
420
421 bool compactAnon = compactionFlags & COMPACT_ACTION_ANON_FLAG;
422 bool compactFile = compactionFlags & COMPACT_ACTION_FILE_FLAG;
423
424 // Set when the system does not support process_madvise syscall to avoid
425 // gathering VMAs in subsequent calls prior to falling back to procfs
426 static bool shouldForceProcFs = false;
427 std::string compactionType;
428 VmaToAdviseFunc vmaToAdviseFunc;
429
430 if (compactAnon) {
431 if (compactFile) {
432 compactionType = "all";
433 vmaToAdviseFunc = getAnyPageAdvice;
434 } else {
435 compactionType = "anon";
436 vmaToAdviseFunc = getAnonPageAdvice;
437 }
438 } else {
439 compactionType = "file";
440 vmaToAdviseFunc = getFilePageAdvice;
441 }
442
443 if (shouldForceProcFs || compactProcess(pid, vmaToAdviseFunc) == -ENOSYS) {
444 shouldForceProcFs = true;
445 compactProcessProcfs(pid, compactionType);
446 }
447 }
448
449 // This performs per-process reclaim on all processes belonging to non-app UIDs.
450 // For the most part, these are non-zygote processes like Treble HALs, but it
451 // also includes zygote-derived processes that run in system UIDs, like bluetooth
452 // or potentially some mainline modules. The only process that should definitely
453 // not be compacted is system_server, since compacting system_server around the
454 // time of BOOT_COMPLETE could result in perceptible issues.
com_android_server_am_CachedAppOptimizer_compactSystem(JNIEnv *,jobject)455 static void com_android_server_am_CachedAppOptimizer_compactSystem(JNIEnv *, jobject) {
456 std::unique_ptr<DIR, decltype(&closedir)> proc(opendir("/proc"), closedir);
457 struct dirent* current;
458 while ((current = readdir(proc.get()))) {
459 if (current->d_type != DT_DIR) {
460 continue;
461 }
462
463 // don't compact system_server, rely on persistent compaction during screen off
464 // in order to avoid mmap_sem-related stalls
465 if (atoi(current->d_name) == getpid()) {
466 continue;
467 }
468
469 std::string status_name = StringPrintf("/proc/%s/status", current->d_name);
470 struct stat status_info;
471
472 if (stat(status_name.c_str(), &status_info) != 0) {
473 // must be some other directory that isn't a pid
474 continue;
475 }
476
477 // android.os.Process.FIRST_APPLICATION_UID
478 if (status_info.st_uid >= 10000) {
479 continue;
480 }
481
482 int pid = atoi(current->d_name);
483
484 compactProcessOrFallback(pid, COMPACT_ACTION_ANON_FLAG | COMPACT_ACTION_FILE_FLAG);
485 }
486 }
487
com_android_server_am_CachedAppOptimizer_cancelCompaction(JNIEnv *,jobject)488 static void com_android_server_am_CachedAppOptimizer_cancelCompaction(JNIEnv*, jobject) {
489 cancelRunningCompaction.store(true);
490 ATRACE_INSTANT_FOR_TRACK(ATRACE_COMPACTION_TRACK, "Cancel compaction");
491 }
492
com_android_server_am_CachedAppOptimizer_threadCpuTimeNs(JNIEnv *,jobject)493 static jlong com_android_server_am_CachedAppOptimizer_threadCpuTimeNs(JNIEnv*, jobject) {
494 int64_t currentCpuTime = systemTime(CLOCK_THREAD_CPUTIME_ID);
495
496 return currentCpuTime;
497 }
498
com_android_server_am_CachedAppOptimizer_getFreeSwapPercent(JNIEnv *,jobject)499 static jdouble com_android_server_am_CachedAppOptimizer_getFreeSwapPercent(JNIEnv*, jobject) {
500 struct sysinfo memoryInfo;
501 int error = sysinfo(&memoryInfo);
502 if(error == -1) {
503 LOG(ERROR) << "Could not check free swap space";
504 return 0;
505 }
506 return (double)memoryInfo.freeswap / (double)memoryInfo.totalswap;
507 }
508
com_android_server_am_CachedAppOptimizer_getUsedZramMemory()509 static jlong com_android_server_am_CachedAppOptimizer_getUsedZramMemory() {
510 android::meminfo::SysMemInfo sysmeminfo;
511 return sysmeminfo.mem_zram_kb();
512 }
513
com_android_server_am_CachedAppOptimizer_getMemoryFreedCompaction()514 static jlong com_android_server_am_CachedAppOptimizer_getMemoryFreedCompaction() {
515 android::meminfo::SysMemInfo sysmeminfo;
516 return sysmeminfo.mem_compacted_kb("/sys/block/zram0/");
517 }
518
com_android_server_am_CachedAppOptimizer_compactProcess(JNIEnv *,jobject,jint pid,jint compactionFlags)519 static void com_android_server_am_CachedAppOptimizer_compactProcess(JNIEnv*, jobject, jint pid,
520 jint compactionFlags) {
521 compactProcessOrFallback(pid, compactionFlags);
522 }
523
524 static const JNINativeMethod sMethods[] = {
525 /* name, signature, funcPtr */
cancelCompaction()526 {"cancelCompaction", "()V",
527 (void*)com_android_server_am_CachedAppOptimizer_cancelCompaction},
threadCpuTimeNs()528 {"threadCpuTimeNs", "()J", (void*)com_android_server_am_CachedAppOptimizer_threadCpuTimeNs},
getFreeSwapPercent()529 {"getFreeSwapPercent", "()D",
530 (void*)com_android_server_am_CachedAppOptimizer_getFreeSwapPercent},
getUsedZramMemory()531 {"getUsedZramMemory", "()J",
532 (void*)com_android_server_am_CachedAppOptimizer_getUsedZramMemory},
getMemoryFreedCompaction()533 {"getMemoryFreedCompaction", "()J",
534 (void*)com_android_server_am_CachedAppOptimizer_getMemoryFreedCompaction},
compactSystem()535 {"compactSystem", "()V", (void*)com_android_server_am_CachedAppOptimizer_compactSystem},
compactProcess(II)536 {"compactProcess", "(II)V", (void*)com_android_server_am_CachedAppOptimizer_compactProcess},
537 };
538
register_android_server_am_CachedAppOptimizer(JNIEnv * env)539 int register_android_server_am_CachedAppOptimizer(JNIEnv* env)
540 {
541 return jniRegisterNativeMethods(env, "com/android/server/am/CachedAppOptimizer",
542 sMethods, NELEM(sMethods));
543 }
544
545 }
546