1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "CachedAppOptimizer"
18 //#define LOG_NDEBUG 0
19 #define ATRACE_TAG ATRACE_TAG_ACTIVITY_MANAGER
20 #define ATRACE_COMPACTION_TRACK "Compaction"
21
22 #include <android-base/file.h>
23 #include <android-base/logging.h>
24 #include <android-base/stringprintf.h>
25 #include <android-base/unique_fd.h>
26 #include <android_runtime/AndroidRuntime.h>
27 #include <binder/IPCThreadState.h>
28 #include <cutils/compiler.h>
29 #include <dirent.h>
30 #include <jni.h>
31 #include <linux/errno.h>
32 #include <log/log.h>
33 #include <meminfo/procmeminfo.h>
34 #include <nativehelper/JNIHelp.h>
35 #include <processgroup/processgroup.h>
36 #include <stddef.h>
37 #include <stdio.h>
38 #include <sys/mman.h>
39 #include <sys/pidfd.h>
40 #include <sys/stat.h>
41 #include <sys/syscall.h>
42 #include <sys/sysinfo.h>
43 #include <sys/types.h>
44 #include <unistd.h>
45 #include <utils/Trace.h>
46
47 #include <algorithm>
48
49 using android::base::StringPrintf;
50 using android::base::WriteStringToFile;
51 using android::meminfo::ProcMemInfo;
52 using namespace android::meminfo;
53
54 #define COMPACT_ACTION_FILE_FLAG 1
55 #define COMPACT_ACTION_ANON_FLAG 2
56
57 using VmaToAdviseFunc = std::function<int(const Vma&)>;
58 using android::base::unique_fd;
59
60 #define SYNC_RECEIVED_WHILE_FROZEN (1)
61 #define ASYNC_RECEIVED_WHILE_FROZEN (2)
62 #define TXNS_PENDING_WHILE_FROZEN (4)
63
64 #define MAX_RW_COUNT (INT_MAX & PAGE_MASK)
65
66 // Defines the maximum amount of VMAs we can send per process_madvise syscall.
67 // Currently this is set to UIO_MAXIOV which is the maximum segments allowed by
68 // iovec implementation used by process_madvise syscall
69 #define MAX_VMAS_PER_BATCH UIO_MAXIOV
70
71 // Maximum bytes that we can send per process_madvise syscall once this limit
72 // is reached we split the remaining VMAs into another syscall. The MAX_RW_COUNT
73 // limit is imposed by iovec implementation. However, if you want to use a smaller
74 // limit, it has to be a page aligned value.
75 #define MAX_BYTES_PER_BATCH MAX_RW_COUNT
76
77 // Selected a high enough number to avoid clashing with linux errno codes
78 #define ERROR_COMPACTION_CANCELLED -1000
79
80 namespace android {
81
82 // Signal happening in separate thread that would bail out compaction
83 // before starting next VMA batch
84 static std::atomic<bool> cancelRunningCompaction;
85
86 // A VmaBatch represents a set of VMAs that can be processed
87 // as VMAs are processed by client code it is expected that the
88 // VMAs get consumed which means they are discarded as they are
89 // processed so that the first element always is the next element
90 // to be sent
91 struct VmaBatch {
92 struct iovec* vmas;
93 // total amount of VMAs to reach the end of iovec
94 size_t totalVmas;
95 // total amount of bytes that are remaining within iovec
96 uint64_t totalBytes;
97 };
98
99 // Advances the iterator by the specified amount of bytes.
100 // This is used to remove already processed or no longer
101 // needed parts of the batch.
102 // Returns total bytes consumed
consumeBytes(VmaBatch & batch,uint64_t bytesToConsume)103 uint64_t consumeBytes(VmaBatch& batch, uint64_t bytesToConsume) {
104 if (CC_UNLIKELY(bytesToConsume) < 0) {
105 LOG(ERROR) << "Cannot consume negative bytes for VMA batch !";
106 return 0;
107 }
108
109 if (CC_UNLIKELY(bytesToConsume > batch.totalBytes)) {
110 // Avoid consuming more bytes than available
111 bytesToConsume = batch.totalBytes;
112 }
113
114 uint64_t bytesConsumed = 0;
115 while (bytesConsumed < bytesToConsume) {
116 if (CC_UNLIKELY(batch.totalVmas == 0)) {
117 // No more vmas to consume
118 break;
119 }
120 if (CC_UNLIKELY(bytesConsumed + batch.vmas[0].iov_len > bytesToConsume)) {
121 // This vma can't be fully consumed, do it partially.
122 uint64_t bytesLeftToConsume = bytesToConsume - bytesConsumed;
123 bytesConsumed += bytesLeftToConsume;
124 batch.vmas[0].iov_base = (void*)((uint64_t)batch.vmas[0].iov_base + bytesLeftToConsume);
125 batch.vmas[0].iov_len -= bytesLeftToConsume;
126 batch.totalBytes -= bytesLeftToConsume;
127 return bytesConsumed;
128 }
129 // This vma can be fully consumed
130 bytesConsumed += batch.vmas[0].iov_len;
131 batch.totalBytes -= batch.vmas[0].iov_len;
132 --batch.totalVmas;
133 ++batch.vmas;
134 }
135
136 return bytesConsumed;
137 }
138
139 // given a source of vmas this class will act as a factory
140 // of VmaBatch objects and it will allow generating batches
141 // until there are no more left in the source vector.
142 // Note: the class does not actually modify the given
143 // vmas vector, instead it iterates on it until the end.
144 class VmaBatchCreator {
145 const std::vector<Vma>* sourceVmas;
146 // This is the destination array where batched VMAs will be stored
147 // it gets encapsulated into a VmaBatch which is the object
148 // meant to be used by client code.
149 struct iovec* destVmas;
150
151 // Parameters to keep track of the iterator on the source vmas
152 int currentIndex_;
153 uint64_t currentOffset_;
154
155 public:
VmaBatchCreator(const std::vector<Vma> * vmasToBatch,struct iovec * destVmasVec)156 VmaBatchCreator(const std::vector<Vma>* vmasToBatch, struct iovec* destVmasVec)
157 : sourceVmas(vmasToBatch), destVmas(destVmasVec), currentIndex_(0), currentOffset_(0) {}
158
currentIndex()159 int currentIndex() { return currentIndex_; }
currentOffset()160 uint64_t currentOffset() { return currentOffset_; }
161
162 // Generates a batch and moves the iterator on the source vmas
163 // past the last VMA in the batch.
164 // Returns true on success, false on failure
createNextBatch(VmaBatch & batch)165 bool createNextBatch(VmaBatch& batch) {
166 if (currentIndex_ >= MAX_VMAS_PER_BATCH && currentIndex_ >= sourceVmas->size()) {
167 return false;
168 }
169
170 const std::vector<Vma>& vmas = *sourceVmas;
171 batch.vmas = destVmas;
172 uint64_t totalBytesInBatch = 0;
173 int indexInBatch = 0;
174
175 // Add VMAs to the batch up until we consumed all the VMAs or
176 // reached any imposed limit of VMAs per batch.
177 while (indexInBatch < MAX_VMAS_PER_BATCH && currentIndex_ < vmas.size()) {
178 uint64_t vmaStart = vmas[currentIndex_].start + currentOffset_;
179 uint64_t vmaSize = vmas[currentIndex_].end - vmaStart;
180 uint64_t bytesAvailableInBatch = MAX_BYTES_PER_BATCH - totalBytesInBatch;
181
182 batch.vmas[indexInBatch].iov_base = (void*)vmaStart;
183
184 if (vmaSize > bytesAvailableInBatch) {
185 // VMA would exceed the max available bytes in batch
186 // clamp with available bytes and finish batch.
187 vmaSize = bytesAvailableInBatch;
188 currentOffset_ += bytesAvailableInBatch;
189 }
190
191 batch.vmas[indexInBatch].iov_len = vmaSize;
192 totalBytesInBatch += vmaSize;
193
194 ++indexInBatch;
195 if (totalBytesInBatch >= MAX_BYTES_PER_BATCH) {
196 // Reached max bytes quota so this marks
197 // the end of the batch
198 if (CC_UNLIKELY(vmaSize == (vmas[currentIndex_].end - vmaStart))) {
199 // we reached max bytes exactly at the end of the vma
200 // so advance to next one
201 currentOffset_ = 0;
202 ++currentIndex_;
203 }
204 break;
205 }
206 // Fully finished current VMA, move to next one
207 currentOffset_ = 0;
208 ++currentIndex_;
209 }
210 batch.totalVmas = indexInBatch;
211 batch.totalBytes = totalBytesInBatch;
212 if (batch.totalVmas == 0 || batch.totalBytes == 0) {
213 // This is an empty batch, mark as failed creating.
214 return false;
215 }
216 return true;
217 }
218 };
219
220 // Madvise a set of VMAs given in a batch for a specific process
221 // The total number of bytes successfully madvised will be set on
222 // outBytesProcessed.
223 // Returns 0 on success and standard linux -errno code returned by
224 // process_madvise on failure
madviseVmasFromBatch(unique_fd & pidfd,VmaBatch & batch,int madviseType,uint64_t * outBytesProcessed)225 int madviseVmasFromBatch(unique_fd& pidfd, VmaBatch& batch, int madviseType,
226 uint64_t* outBytesProcessed) {
227 if (batch.totalVmas == 0 || batch.totalBytes == 0) {
228 // No VMAs in Batch, skip.
229 *outBytesProcessed = 0;
230 return 0;
231 }
232
233 ATRACE_BEGIN(StringPrintf("Madvise %d: %zu VMAs.", madviseType, batch.totalVmas).c_str());
234 int64_t bytesProcessedInSend =
235 process_madvise(pidfd, batch.vmas, batch.totalVmas, madviseType, 0);
236 ATRACE_END();
237 if (CC_UNLIKELY(bytesProcessedInSend == -1)) {
238 bytesProcessedInSend = 0;
239 if (errno != EINVAL) {
240 // Forward irrecoverable errors and bail out compaction
241 *outBytesProcessed = 0;
242 return -errno;
243 }
244 }
245 if (bytesProcessedInSend == 0) {
246 // When we find a VMA with error, fully consume it as it
247 // is extremely expensive to iterate on its pages one by one
248 bytesProcessedInSend = batch.vmas[0].iov_len;
249 } else if (bytesProcessedInSend < batch.totalBytes) {
250 // Partially processed the bytes requested
251 // skip last page which is where it failed.
252 bytesProcessedInSend += PAGE_SIZE;
253 }
254 bytesProcessedInSend = consumeBytes(batch, bytesProcessedInSend);
255
256 *outBytesProcessed = bytesProcessedInSend;
257 return 0;
258 }
259
260 // Legacy method for compacting processes, any new code should
261 // use compactProcess instead.
compactProcessProcfs(int pid,const std::string & compactionType)262 static inline void compactProcessProcfs(int pid, const std::string& compactionType) {
263 std::string reclaim_path = StringPrintf("/proc/%d/reclaim", pid);
264 WriteStringToFile(compactionType, reclaim_path);
265 }
266
267 // Compacts a set of VMAs for pid using an madviseType accepted by process_madvise syscall
268 // Returns the total bytes that where madvised.
269 //
270 // If any VMA fails compaction due to -EINVAL it will be skipped and continue.
271 // However, if it fails for any other reason, it will bail out and forward the error
compactMemory(const std::vector<Vma> & vmas,int pid,int madviseType)272 static int64_t compactMemory(const std::vector<Vma>& vmas, int pid, int madviseType) {
273 if (vmas.empty()) {
274 return 0;
275 }
276
277 unique_fd pidfd(pidfd_open(pid, 0));
278 if (pidfd < 0) {
279 // Skip compaction if failed to open pidfd with any error
280 return -errno;
281 }
282
283 struct iovec destVmas[MAX_VMAS_PER_BATCH];
284
285 VmaBatch batch;
286 VmaBatchCreator batcher(&vmas, destVmas);
287
288 int64_t totalBytesProcessed = 0;
289 while (batcher.createNextBatch(batch)) {
290 uint64_t bytesProcessedInSend;
291 ScopedTrace batchTrace(ATRACE_TAG, "VMA Batch");
292 do {
293 if (CC_UNLIKELY(cancelRunningCompaction.load())) {
294 // There could be a significant delay between when a compaction
295 // is requested and when it is handled during this time our
296 // OOM adjust could have improved.
297 LOG(DEBUG) << "Cancelled running compaction for " << pid;
298 ATRACE_INSTANT_FOR_TRACK(ATRACE_COMPACTION_TRACK,
299 StringPrintf("Cancelled compaction for %d", pid).c_str());
300 return ERROR_COMPACTION_CANCELLED;
301 }
302 int error = madviseVmasFromBatch(pidfd, batch, madviseType, &bytesProcessedInSend);
303 if (error < 0) {
304 // Returns standard linux errno code
305 return error;
306 }
307 if (CC_UNLIKELY(bytesProcessedInSend == 0)) {
308 // This means there was a problem consuming bytes,
309 // bail out since no forward progress can be made with this batch
310 break;
311 }
312 totalBytesProcessed += bytesProcessedInSend;
313 } while (batch.totalBytes > 0 && batch.totalVmas > 0);
314 }
315
316 return totalBytesProcessed;
317 }
318
getFilePageAdvice(const Vma & vma)319 static int getFilePageAdvice(const Vma& vma) {
320 if (vma.inode > 0 && !vma.is_shared) {
321 return MADV_COLD;
322 }
323 return -1;
324 }
getAnonPageAdvice(const Vma & vma)325 static int getAnonPageAdvice(const Vma& vma) {
326 if (vma.inode == 0 && !vma.is_shared) {
327 return MADV_PAGEOUT;
328 }
329 return -1;
330 }
getAnyPageAdvice(const Vma & vma)331 static int getAnyPageAdvice(const Vma& vma) {
332 if (vma.inode == 0 && !vma.is_shared) {
333 return MADV_PAGEOUT;
334 }
335 return MADV_COLD;
336 }
337
338 // Perform a full process compaction using process_madvise syscall
339 // using the madvise behavior defined by vmaToAdviseFunc per VMA.
340 //
341 // Currently supported behaviors are MADV_COLD and MADV_PAGEOUT.
342 //
343 // Returns the total number of bytes compacted on success. On error
344 // returns process_madvise errno code or if compaction was cancelled
345 // it returns ERROR_COMPACTION_CANCELLED.
compactProcess(int pid,VmaToAdviseFunc vmaToAdviseFunc)346 static int64_t compactProcess(int pid, VmaToAdviseFunc vmaToAdviseFunc) {
347 cancelRunningCompaction.store(false);
348
349 ATRACE_BEGIN("CollectVmas");
350 ProcMemInfo meminfo(pid);
351 std::vector<Vma> pageoutVmas, coldVmas;
352 auto vmaCollectorCb = [&coldVmas,&pageoutVmas,&vmaToAdviseFunc](const Vma& vma) {
353 int advice = vmaToAdviseFunc(vma);
354 switch (advice) {
355 case MADV_COLD:
356 coldVmas.push_back(vma);
357 break;
358 case MADV_PAGEOUT:
359 pageoutVmas.push_back(vma);
360 break;
361 }
362 };
363 meminfo.ForEachVmaFromMaps(vmaCollectorCb);
364 ATRACE_END();
365
366 int64_t pageoutBytes = compactMemory(pageoutVmas, pid, MADV_PAGEOUT);
367 if (pageoutBytes < 0) {
368 // Error, just forward it.
369 cancelRunningCompaction.store(false);
370 return pageoutBytes;
371 }
372
373 int64_t coldBytes = compactMemory(coldVmas, pid, MADV_COLD);
374 if (coldBytes < 0) {
375 // Error, just forward it.
376 cancelRunningCompaction.store(false);
377 return coldBytes;
378 }
379
380 return pageoutBytes + coldBytes;
381 }
382
383 // Compact process using process_madvise syscall or fallback to procfs in
384 // case syscall does not exist.
compactProcessOrFallback(int pid,int compactionFlags)385 static void compactProcessOrFallback(int pid, int compactionFlags) {
386 if ((compactionFlags & (COMPACT_ACTION_ANON_FLAG | COMPACT_ACTION_FILE_FLAG)) == 0) return;
387
388 bool compactAnon = compactionFlags & COMPACT_ACTION_ANON_FLAG;
389 bool compactFile = compactionFlags & COMPACT_ACTION_FILE_FLAG;
390
391 // Set when the system does not support process_madvise syscall to avoid
392 // gathering VMAs in subsequent calls prior to falling back to procfs
393 static bool shouldForceProcFs = false;
394 std::string compactionType;
395 VmaToAdviseFunc vmaToAdviseFunc;
396
397 if (compactAnon) {
398 if (compactFile) {
399 compactionType = "all";
400 vmaToAdviseFunc = getAnyPageAdvice;
401 } else {
402 compactionType = "anon";
403 vmaToAdviseFunc = getAnonPageAdvice;
404 }
405 } else {
406 compactionType = "file";
407 vmaToAdviseFunc = getFilePageAdvice;
408 }
409
410 if (shouldForceProcFs || compactProcess(pid, vmaToAdviseFunc) == -ENOSYS) {
411 shouldForceProcFs = true;
412 compactProcessProcfs(pid, compactionType);
413 }
414 }
415
416 // This performs per-process reclaim on all processes belonging to non-app UIDs.
417 // For the most part, these are non-zygote processes like Treble HALs, but it
418 // also includes zygote-derived processes that run in system UIDs, like bluetooth
419 // or potentially some mainline modules. The only process that should definitely
420 // not be compacted is system_server, since compacting system_server around the
421 // time of BOOT_COMPLETE could result in perceptible issues.
com_android_server_am_CachedAppOptimizer_compactSystem(JNIEnv *,jobject)422 static void com_android_server_am_CachedAppOptimizer_compactSystem(JNIEnv *, jobject) {
423 std::unique_ptr<DIR, decltype(&closedir)> proc(opendir("/proc"), closedir);
424 struct dirent* current;
425 while ((current = readdir(proc.get()))) {
426 if (current->d_type != DT_DIR) {
427 continue;
428 }
429
430 // don't compact system_server, rely on persistent compaction during screen off
431 // in order to avoid mmap_sem-related stalls
432 if (atoi(current->d_name) == getpid()) {
433 continue;
434 }
435
436 std::string status_name = StringPrintf("/proc/%s/status", current->d_name);
437 struct stat status_info;
438
439 if (stat(status_name.c_str(), &status_info) != 0) {
440 // must be some other directory that isn't a pid
441 continue;
442 }
443
444 // android.os.Process.FIRST_APPLICATION_UID
445 if (status_info.st_uid >= 10000) {
446 continue;
447 }
448
449 int pid = atoi(current->d_name);
450
451 compactProcessOrFallback(pid, COMPACT_ACTION_ANON_FLAG | COMPACT_ACTION_FILE_FLAG);
452 }
453 }
454
com_android_server_am_CachedAppOptimizer_cancelCompaction(JNIEnv *,jobject)455 static void com_android_server_am_CachedAppOptimizer_cancelCompaction(JNIEnv*, jobject) {
456 cancelRunningCompaction.store(true);
457 ATRACE_INSTANT_FOR_TRACK(ATRACE_COMPACTION_TRACK, "Cancel compaction");
458 }
459
com_android_server_am_CachedAppOptimizer_getFreeSwapPercent(JNIEnv *,jobject)460 static jdouble com_android_server_am_CachedAppOptimizer_getFreeSwapPercent(JNIEnv*, jobject) {
461 struct sysinfo memoryInfo;
462 int error = sysinfo(&memoryInfo);
463 if(error == -1) {
464 LOG(ERROR) << "Could not check free swap space";
465 return 0;
466 }
467 return (double)memoryInfo.freeswap / (double)memoryInfo.totalswap;
468 }
469
com_android_server_am_CachedAppOptimizer_compactProcess(JNIEnv *,jobject,jint pid,jint compactionFlags)470 static void com_android_server_am_CachedAppOptimizer_compactProcess(JNIEnv*, jobject, jint pid,
471 jint compactionFlags) {
472 compactProcessOrFallback(pid, compactionFlags);
473 }
474
com_android_server_am_CachedAppOptimizer_freezeBinder(JNIEnv * env,jobject clazz,jint pid,jboolean freeze)475 static jint com_android_server_am_CachedAppOptimizer_freezeBinder(
476 JNIEnv *env, jobject clazz, jint pid, jboolean freeze) {
477
478 jint retVal = IPCThreadState::freeze(pid, freeze, 100 /* timeout [ms] */);
479 if (retVal != 0 && retVal != -EAGAIN) {
480 jniThrowException(env, "java/lang/RuntimeException", "Unable to freeze/unfreeze binder");
481 }
482
483 return retVal;
484 }
485
com_android_server_am_CachedAppOptimizer_getBinderFreezeInfo(JNIEnv * env,jobject clazz,jint pid)486 static jint com_android_server_am_CachedAppOptimizer_getBinderFreezeInfo(JNIEnv *env,
487 jobject clazz, jint pid) {
488 uint32_t syncReceived = 0, asyncReceived = 0;
489
490 int error = IPCThreadState::getProcessFreezeInfo(pid, &syncReceived, &asyncReceived);
491
492 if (error < 0) {
493 jniThrowException(env, "java/lang/RuntimeException", strerror(error));
494 }
495
496 jint retVal = 0;
497
498 // bit 0 of sync_recv goes to bit 0 of retVal
499 retVal |= syncReceived & SYNC_RECEIVED_WHILE_FROZEN;
500 // bit 0 of async_recv goes to bit 1 of retVal
501 retVal |= (asyncReceived << 1) & ASYNC_RECEIVED_WHILE_FROZEN;
502 // bit 1 of sync_recv goes to bit 2 of retVal
503 retVal |= (syncReceived << 1) & TXNS_PENDING_WHILE_FROZEN;
504
505 return retVal;
506 }
507
com_android_server_am_CachedAppOptimizer_getFreezerCheckPath(JNIEnv * env,jobject clazz)508 static jstring com_android_server_am_CachedAppOptimizer_getFreezerCheckPath(JNIEnv* env,
509 jobject clazz) {
510 std::string path;
511
512 if (!getAttributePathForTask("FreezerState", getpid(), &path)) {
513 path = "";
514 }
515
516 return env->NewStringUTF(path.c_str());
517 }
518
519 static const JNINativeMethod sMethods[] = {
520 /* name, signature, funcPtr */
521 {"cancelCompaction", "()V",
522 (void*)com_android_server_am_CachedAppOptimizer_cancelCompaction},
523 {"getFreeSwapPercent", "()D",
524 (void*)com_android_server_am_CachedAppOptimizer_getFreeSwapPercent},
525 {"compactSystem", "()V", (void*)com_android_server_am_CachedAppOptimizer_compactSystem},
526 {"compactProcess", "(II)V", (void*)com_android_server_am_CachedAppOptimizer_compactProcess},
527 {"freezeBinder", "(IZ)I", (void*)com_android_server_am_CachedAppOptimizer_freezeBinder},
528 {"getBinderFreezeInfo", "(I)I",
529 (void*)com_android_server_am_CachedAppOptimizer_getBinderFreezeInfo},
530 {"getFreezerCheckPath", "()Ljava/lang/String;",
531 (void*)com_android_server_am_CachedAppOptimizer_getFreezerCheckPath}};
532
register_android_server_am_CachedAppOptimizer(JNIEnv * env)533 int register_android_server_am_CachedAppOptimizer(JNIEnv* env)
534 {
535 return jniRegisterNativeMethods(env, "com/android/server/am/CachedAppOptimizer",
536 sMethods, NELEM(sMethods));
537 }
538
539 }
540