1 /*
2 * Copyright (C) 2012-2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 // for manual checking of stale entries during ChattyLogBuffer::erase()
17 //#define DEBUG_CHECK_FOR_STALE_ENTRIES
18
19 #include "ChattyLogBuffer.h"
20
21 #include <ctype.h>
22 #include <endian.h>
23 #include <errno.h>
24 #include <stdio.h>
25 #include <string.h>
26 #include <sys/cdefs.h>
27 #include <sys/user.h>
28 #include <time.h>
29 #include <unistd.h>
30
31 #include <limits>
32 #include <unordered_map>
33 #include <utility>
34
35 #include <private/android_logger.h>
36
37 #include "LogUtils.h"
38
39 #ifndef __predict_false
40 #define __predict_false(exp) __builtin_expect((exp) != 0, 0)
41 #endif
42
ChattyLogBuffer(LogReaderList * reader_list,LogTags * tags,PruneList * prune,LogStatistics * stats)43 ChattyLogBuffer::ChattyLogBuffer(LogReaderList* reader_list, LogTags* tags, PruneList* prune,
44 LogStatistics* stats)
45 : SimpleLogBuffer(reader_list, tags, stats), prune_(prune) {}
46
~ChattyLogBuffer()47 ChattyLogBuffer::~ChattyLogBuffer() {}
48
49 enum match_type { DIFFERENT, SAME, SAME_LIBLOG };
50
Identical(const LogBufferElement & elem,const LogBufferElement & last)51 static enum match_type Identical(const LogBufferElement& elem, const LogBufferElement& last) {
52 ssize_t lenl = elem.msg_len();
53 if (lenl <= 0) return DIFFERENT; // value if this represents a chatty elem
54 ssize_t lenr = last.msg_len();
55 if (lenr <= 0) return DIFFERENT; // value if this represents a chatty elem
56 if (elem.uid() != last.uid()) return DIFFERENT;
57 if (elem.pid() != last.pid()) return DIFFERENT;
58 if (elem.tid() != last.tid()) return DIFFERENT;
59
60 // last is more than a minute old, stop squashing identical messages
61 if (elem.realtime().nsec() > (last.realtime().nsec() + 60 * NS_PER_SEC)) return DIFFERENT;
62
63 // Identical message
64 const char* msgl = elem.msg();
65 const char* msgr = last.msg();
66 if (lenl == lenr) {
67 if (!fastcmp<memcmp>(msgl, msgr, lenl)) return SAME;
68 // liblog tagged messages (content gets summed)
69 if (elem.log_id() == LOG_ID_EVENTS && lenl == sizeof(android_log_event_int_t) &&
70 !fastcmp<memcmp>(msgl, msgr, sizeof(android_log_event_int_t) - sizeof(int32_t)) &&
71 elem.GetTag() == LIBLOG_LOG_TAG) {
72 return SAME_LIBLOG;
73 }
74 }
75
76 // audit message (except sequence number) identical?
77 if (IsBinary(last.log_id()) &&
78 lenl > static_cast<ssize_t>(sizeof(android_log_event_string_t)) &&
79 lenr > static_cast<ssize_t>(sizeof(android_log_event_string_t))) {
80 if (fastcmp<memcmp>(msgl, msgr, sizeof(android_log_event_string_t) - sizeof(int32_t))) {
81 return DIFFERENT;
82 }
83 msgl += sizeof(android_log_event_string_t);
84 lenl -= sizeof(android_log_event_string_t);
85 msgr += sizeof(android_log_event_string_t);
86 lenr -= sizeof(android_log_event_string_t);
87 }
88 static const char avc[] = "): avc: ";
89 const char* avcl = android::strnstr(msgl, lenl, avc);
90 if (!avcl) return DIFFERENT;
91 lenl -= avcl - msgl;
92 const char* avcr = android::strnstr(msgr, lenr, avc);
93 if (!avcr) return DIFFERENT;
94 lenr -= avcr - msgr;
95 if (lenl != lenr) return DIFFERENT;
96 if (fastcmp<memcmp>(avcl + strlen(avc), avcr + strlen(avc), lenl - strlen(avc))) {
97 return DIFFERENT;
98 }
99 return SAME;
100 }
101
LogInternal(LogBufferElement && elem)102 void ChattyLogBuffer::LogInternal(LogBufferElement&& elem) {
103 // b/137093665: don't coalesce security messages.
104 if (elem.log_id() == LOG_ID_SECURITY) {
105 SimpleLogBuffer::LogInternal(std::move(elem));
106 return;
107 }
108 int log_id = elem.log_id();
109
110 // Initialize last_logged_elements_ to a copy of elem if logging the first element for a log_id.
111 if (!last_logged_elements_[log_id]) {
112 last_logged_elements_[log_id].emplace(elem);
113 SimpleLogBuffer::LogInternal(std::move(elem));
114 return;
115 }
116
117 LogBufferElement& current_last = *last_logged_elements_[log_id];
118 enum match_type match = Identical(elem, current_last);
119
120 if (match == DIFFERENT) {
121 if (duplicate_elements_[log_id]) {
122 // If we previously had 3+ identical messages, log the chatty message.
123 if (duplicate_elements_[log_id]->dropped_count() > 0) {
124 SimpleLogBuffer::LogInternal(std::move(*duplicate_elements_[log_id]));
125 }
126 duplicate_elements_[log_id].reset();
127 // Log the saved copy of the last identical message seen.
128 SimpleLogBuffer::LogInternal(std::move(current_last));
129 }
130 last_logged_elements_[log_id].emplace(elem);
131 SimpleLogBuffer::LogInternal(std::move(elem));
132 return;
133 }
134
135 // 2 identical message: set duplicate_elements_ appropriately.
136 if (!duplicate_elements_[log_id]) {
137 duplicate_elements_[log_id].emplace(std::move(current_last));
138 last_logged_elements_[log_id].emplace(std::move(elem));
139 return;
140 }
141
142 // 3+ identical LIBLOG event messages: coalesce them into last_logged_elements_.
143 if (match == SAME_LIBLOG) {
144 const android_log_event_int_t* current_last_event =
145 reinterpret_cast<const android_log_event_int_t*>(current_last.msg());
146 int64_t current_last_count = current_last_event->payload.data;
147 android_log_event_int_t* elem_event =
148 reinterpret_cast<android_log_event_int_t*>(const_cast<char*>(elem.msg()));
149 int64_t elem_count = elem_event->payload.data;
150
151 int64_t total = current_last_count + elem_count;
152 if (total > std::numeric_limits<int32_t>::max()) {
153 SimpleLogBuffer::LogInternal(std::move(current_last));
154 last_logged_elements_[log_id].emplace(std::move(elem));
155 return;
156 }
157 stats()->AddTotal(current_last.log_id(), current_last.msg_len());
158 elem_event->payload.data = total;
159 last_logged_elements_[log_id].emplace(std::move(elem));
160 return;
161 }
162
163 // 3+ identical messages (not LIBLOG) messages: increase the drop count.
164 uint16_t dropped_count = duplicate_elements_[log_id]->dropped_count();
165 if (dropped_count == std::numeric_limits<uint16_t>::max()) {
166 SimpleLogBuffer::LogInternal(std::move(*duplicate_elements_[log_id]));
167 dropped_count = 0;
168 }
169 // We're dropping the current_last log so add its stats to the total.
170 stats()->AddTotal(current_last.log_id(), current_last.msg_len());
171 // Use current_last for tracking the dropped count to always use the latest timestamp.
172 current_last.SetDropped(dropped_count + 1);
173 duplicate_elements_[log_id].emplace(std::move(current_last));
174 last_logged_elements_[log_id].emplace(std::move(elem));
175 }
176
Erase(LogBufferElementCollection::iterator it,bool coalesce)177 LogBufferElementCollection::iterator ChattyLogBuffer::Erase(LogBufferElementCollection::iterator it,
178 bool coalesce) {
179 LogBufferElement& element = *it;
180 log_id_t id = element.log_id();
181
182 // Remove iterator references in the various lists that will become stale
183 // after the element is erased from the main logging list.
184
185 { // start of scope for found iterator
186 int key = (id == LOG_ID_EVENTS || id == LOG_ID_SECURITY) ? element.GetTag() : element.uid();
187 LogBufferIteratorMap::iterator found = mLastWorst[id].find(key);
188 if ((found != mLastWorst[id].end()) && (it == found->second)) {
189 mLastWorst[id].erase(found);
190 }
191 }
192
193 { // start of scope for pid found iterator
194 // element->uid() may not be AID_SYSTEM for next-best-watermark.
195 // will not assume id != LOG_ID_EVENTS or LOG_ID_SECURITY for KISS and
196 // long term code stability, find() check should be fast for those ids.
197 LogBufferPidIteratorMap::iterator found = mLastWorstPidOfSystem[id].find(element.pid());
198 if (found != mLastWorstPidOfSystem[id].end() && it == found->second) {
199 mLastWorstPidOfSystem[id].erase(found);
200 }
201 }
202
203 #ifdef DEBUG_CHECK_FOR_STALE_ENTRIES
204 LogBufferElementCollection::iterator bad = it;
205 int key = (id == LOG_ID_EVENTS || id == LOG_ID_SECURITY) ? element->GetTag() : element->uid();
206 #endif
207
208 if (coalesce) {
209 stats()->Erase(element.ToLogStatisticsElement());
210 } else {
211 stats()->Subtract(element.ToLogStatisticsElement());
212 }
213
214 it = SimpleLogBuffer::Erase(it);
215
216 #ifdef DEBUG_CHECK_FOR_STALE_ENTRIES
217 log_id_for_each(i) {
218 for (auto b : mLastWorst[i]) {
219 if (bad == b.second) {
220 LOG(ERROR) << StringPrintf("stale mLastWorst[%d] key=%d mykey=%d", i, b.first, key);
221 }
222 }
223 for (auto b : mLastWorstPidOfSystem[i]) {
224 if (bad == b.second) {
225 LOG(ERROR) << StringPrintf("stale mLastWorstPidOfSystem[%d] pid=%d", i, b.first);
226 }
227 }
228 }
229 #endif
230 return it;
231 }
232
233 // Define a temporary mechanism to report the last LogBufferElement pointer
234 // for the specified uid, pid and tid. Used below to help merge-sort when
235 // pruning for worst UID.
236 class LogBufferElementLast {
237 typedef std::unordered_map<uint64_t, LogBufferElement*> LogBufferElementMap;
238 LogBufferElementMap map;
239
240 public:
coalesce(LogBufferElement * element,uint16_t dropped)241 bool coalesce(LogBufferElement* element, uint16_t dropped) {
242 uint64_t key = LogBufferElementKey(element->uid(), element->pid(), element->tid());
243 LogBufferElementMap::iterator it = map.find(key);
244 if (it != map.end()) {
245 LogBufferElement* found = it->second;
246 uint16_t moreDropped = found->dropped_count();
247 if ((dropped + moreDropped) > USHRT_MAX) {
248 map.erase(it);
249 } else {
250 found->SetDropped(dropped + moreDropped);
251 return true;
252 }
253 }
254 return false;
255 }
256
add(LogBufferElement * element)257 void add(LogBufferElement* element) {
258 uint64_t key = LogBufferElementKey(element->uid(), element->pid(), element->tid());
259 map[key] = element;
260 }
261
clear()262 void clear() { map.clear(); }
263
clear(LogBufferElement * element)264 void clear(LogBufferElement* element) {
265 uint64_t current = element->realtime().nsec() - (EXPIRE_RATELIMIT * NS_PER_SEC);
266 for (LogBufferElementMap::iterator it = map.begin(); it != map.end();) {
267 LogBufferElement* mapElement = it->second;
268 if (mapElement->dropped_count() >= EXPIRE_THRESHOLD &&
269 current > mapElement->realtime().nsec()) {
270 it = map.erase(it);
271 } else {
272 ++it;
273 }
274 }
275 }
276
277 private:
LogBufferElementKey(uid_t uid,pid_t pid,pid_t tid)278 uint64_t LogBufferElementKey(uid_t uid, pid_t pid, pid_t tid) {
279 return uint64_t(uid) << 32 | uint64_t(pid) << 16 | uint64_t(tid);
280 }
281 };
282
283 // prune "pruneRows" of type "id" from the buffer.
284 //
285 // This garbage collection task is used to expire log entries. It is called to
286 // remove all logs (clear), all UID logs (unprivileged clear), or every
287 // 256 or 10% of the total logs (whichever is less) to prune the logs.
288 //
289 // First there is a prep phase where we discover the reader region lock that
290 // acts as a backstop to any pruning activity to stop there and go no further.
291 //
292 // There are three major pruning loops that follow. All expire from the oldest
293 // entries. Since there are multiple log buffers, the Android logging facility
294 // will appear to drop entries 'in the middle' when looking at multiple log
295 // sources and buffers. This effect is slightly more prominent when we prune
296 // the worst offender by logging source. Thus the logs slowly loose content
297 // and value as you move back in time. This is preferred since chatty sources
298 // invariably move the logs value down faster as less chatty sources would be
299 // expired in the noise.
300 //
301 // The first pass prunes elements that match 3 possible rules:
302 // 1) A high priority prune rule, for example ~100/20, which indicates elements from UID 100 and PID
303 // 20 should be pruned in this first pass.
304 // 2) The default chatty pruning rule, ~!. This rule sums the total size spent on log messages for
305 // each UID this log buffer. If the highest sum consumes more than 12.5% of the log buffer, then
306 // these elements from that UID are pruned.
307 // 3) The default AID_SYSTEM pruning rule, ~1000/!. This rule is a special case to 2), if
308 // AID_SYSTEM is the top consumer of the log buffer, then this rule sums the total size spent on
309 // log messages for each PID in AID_SYSTEM in this log buffer and prunes elements from the PID
310 // with the highest sum.
311 // This pass reevaluates the sums for rules 2) and 3) for every log message pruned. It creates
312 // 'chatty' entries for the elements that it prunes and merges related chatty entries together. It
313 // completes when one of three conditions have been met:
314 // 1) The requested element count has been pruned.
315 // 2) There are no elements that match any of these rules.
316 // 3) A reader is referencing the oldest element that would match these rules.
317 //
318 // The second pass prunes elements starting from the beginning of the log. It skips elements that
319 // match any low priority prune rules. It completes when one of three conditions have been met:
320 // 1) The requested element count has been pruned.
321 // 2) All elements except those mwatching low priority prune rules have been pruned.
322 // 3) A reader is referencing the oldest element that would match these rules.
323 //
324 // The final pass only happens if there are any low priority prune rules and if the first two passes
325 // were unable to prune the requested number of elements. It prunes elements all starting from the
326 // beginning of the log, regardless of if they match any low priority prune rules.
327 //
328 // If the requested number of logs was unable to be pruned, KickReader() is called to mitigate the
329 // situation before the next call to Prune() and the function returns false. Otherwise, if the
330 // requested number of logs or all logs present in the buffer are pruned, in the case of Clear(),
331 // it returns true.
Prune(log_id_t id,unsigned long pruneRows,uid_t caller_uid)332 bool ChattyLogBuffer::Prune(log_id_t id, unsigned long pruneRows, uid_t caller_uid) {
333 LogReaderThread* oldest = nullptr;
334 bool clearAll = pruneRows == ULONG_MAX;
335
336 // Region locked?
337 for (const auto& reader_thread : reader_list()->reader_threads()) {
338 if (!reader_thread->IsWatching(id)) {
339 continue;
340 }
341 if (!oldest || oldest->start() > reader_thread->start() ||
342 (oldest->start() == reader_thread->start() &&
343 reader_thread->deadline().time_since_epoch().count() != 0)) {
344 oldest = reader_thread.get();
345 }
346 }
347
348 LogBufferElementCollection::iterator it;
349
350 if (__predict_false(caller_uid != AID_ROOT)) { // unlikely
351 // Only here if clear all request from non system source, so chatty
352 // filter logistics is not required.
353 it = GetOldest(id);
354 while (it != logs().end()) {
355 LogBufferElement& element = *it;
356
357 if (element.log_id() != id || element.uid() != caller_uid) {
358 ++it;
359 continue;
360 }
361
362 if (oldest && oldest->start() <= element.sequence()) {
363 KickReader(oldest, id, pruneRows);
364 return false;
365 }
366
367 it = Erase(it);
368 if (--pruneRows == 0) {
369 return true;
370 }
371 }
372 return true;
373 }
374
375 // First prune pass.
376 bool check_high_priority = id != LOG_ID_SECURITY && prune_->HasHighPriorityPruneRules();
377 while (!clearAll && (pruneRows > 0)) {
378 // recalculate the worst offender on every batched pass
379 int worst = -1; // not valid for uid() or getKey()
380 size_t worst_sizes = 0;
381 size_t second_worst_sizes = 0;
382 pid_t worstPid = 0; // POSIX guarantees PID != 0
383
384 if (worstUidEnabledForLogid(id) && prune_->worst_uid_enabled()) {
385 // Calculate threshold as 12.5% of available storage
386 size_t threshold = max_size(id) / 8;
387
388 if (id == LOG_ID_EVENTS || id == LOG_ID_SECURITY) {
389 stats()->WorstTwoTags(threshold, &worst, &worst_sizes, &second_worst_sizes);
390 // per-pid filter for AID_SYSTEM sources is too complex
391 } else {
392 stats()->WorstTwoUids(id, threshold, &worst, &worst_sizes, &second_worst_sizes);
393
394 if (worst == AID_SYSTEM && prune_->worst_pid_of_system_enabled()) {
395 stats()->WorstTwoSystemPids(id, worst_sizes, &worstPid, &second_worst_sizes);
396 }
397 }
398 }
399
400 // skip if we have neither a worst UID or high priority prune rules
401 if (worst == -1 && !check_high_priority) {
402 break;
403 }
404
405 bool kick = false;
406 bool leading = true; // true if starting from the oldest log entry, false if starting from
407 // a specific chatty entry.
408 // Perform at least one mandatory garbage collection cycle in following
409 // - clear leading chatty tags
410 // - coalesce chatty tags
411 // - check age-out of preserved logs
412 bool gc = pruneRows <= 1;
413 if (!gc && (worst != -1)) {
414 { // begin scope for worst found iterator
415 LogBufferIteratorMap::iterator found = mLastWorst[id].find(worst);
416 if (found != mLastWorst[id].end() && found->second != logs().end()) {
417 leading = false;
418 it = found->second;
419 }
420 }
421 if (worstPid) { // begin scope for pid worst found iterator
422 // FYI: worstPid only set if !LOG_ID_EVENTS and
423 // !LOG_ID_SECURITY, not going to make that assumption ...
424 LogBufferPidIteratorMap::iterator found = mLastWorstPidOfSystem[id].find(worstPid);
425 if (found != mLastWorstPidOfSystem[id].end() && found->second != logs().end()) {
426 leading = false;
427 it = found->second;
428 }
429 }
430 }
431 if (leading) {
432 it = GetOldest(id);
433 }
434 static const log_time too_old{EXPIRE_HOUR_THRESHOLD * 60 * 60, 0};
435 LogBufferElementCollection::iterator lastt;
436 lastt = logs().end();
437 --lastt;
438 LogBufferElementLast last;
439 while (it != logs().end()) {
440 LogBufferElement& element = *it;
441
442 if (oldest && oldest->start() <= element.sequence()) {
443 // Do not let chatty eliding trigger any reader mitigation
444 break;
445 }
446
447 if (element.log_id() != id) {
448 ++it;
449 continue;
450 }
451 // below this point element->log_id() == id
452
453 uint16_t dropped = element.dropped_count();
454
455 // remove any leading drops
456 if (leading && dropped) {
457 it = Erase(it);
458 continue;
459 }
460
461 if (dropped && last.coalesce(&element, dropped)) {
462 it = Erase(it, true);
463 continue;
464 }
465
466 int key = (id == LOG_ID_EVENTS || id == LOG_ID_SECURITY) ? element.GetTag()
467 : element.uid();
468
469 if (check_high_priority && prune_->IsHighPriority(&element)) {
470 last.clear(&element);
471 it = Erase(it);
472 if (dropped) {
473 continue;
474 }
475
476 pruneRows--;
477 if (pruneRows == 0) {
478 break;
479 }
480
481 if (key == worst) {
482 kick = true;
483 if (worst_sizes < second_worst_sizes) {
484 break;
485 }
486 worst_sizes -= element.msg_len();
487 }
488 continue;
489 }
490
491 if (element.realtime() < (lastt->realtime() - too_old) ||
492 element.realtime() > lastt->realtime()) {
493 break;
494 }
495
496 if (dropped) {
497 last.add(&element);
498 if (worstPid && ((!gc && element.pid() == worstPid) ||
499 mLastWorstPidOfSystem[id].find(element.pid()) ==
500 mLastWorstPidOfSystem[id].end())) {
501 // element->uid() may not be AID_SYSTEM, next best
502 // watermark if current one empty. id is not LOG_ID_EVENTS
503 // or LOG_ID_SECURITY because of worstPid check.
504 mLastWorstPidOfSystem[id][element.pid()] = it;
505 }
506 if ((!gc && !worstPid && (key == worst)) ||
507 (mLastWorst[id].find(key) == mLastWorst[id].end())) {
508 mLastWorst[id][key] = it;
509 }
510 ++it;
511 continue;
512 }
513
514 if (key != worst || (worstPid && element.pid() != worstPid)) {
515 leading = false;
516 last.clear(&element);
517 ++it;
518 continue;
519 }
520 // key == worst below here
521 // If worstPid set, then element->pid() == worstPid below here
522
523 pruneRows--;
524 if (pruneRows == 0) {
525 break;
526 }
527
528 kick = true;
529
530 uint16_t len = element.msg_len();
531
532 // do not create any leading drops
533 if (leading) {
534 it = Erase(it);
535 } else {
536 stats()->Drop(element.ToLogStatisticsElement());
537 element.SetDropped(1);
538 if (last.coalesce(&element, 1)) {
539 it = Erase(it, true);
540 } else {
541 last.add(&element);
542 if (worstPid && (!gc || mLastWorstPidOfSystem[id].find(worstPid) ==
543 mLastWorstPidOfSystem[id].end())) {
544 // element->uid() may not be AID_SYSTEM, next best
545 // watermark if current one empty. id is not
546 // LOG_ID_EVENTS or LOG_ID_SECURITY because of worstPid.
547 mLastWorstPidOfSystem[id][worstPid] = it;
548 }
549 if ((!gc && !worstPid) || mLastWorst[id].find(worst) == mLastWorst[id].end()) {
550 mLastWorst[id][worst] = it;
551 }
552 ++it;
553 }
554 }
555 if (worst_sizes < second_worst_sizes) {
556 break;
557 }
558 worst_sizes -= len;
559 }
560 last.clear();
561
562 if (!kick || !prune_->worst_uid_enabled()) {
563 break; // the following loop will ask bad clients to skip/drop
564 }
565 }
566
567 // Second prune pass.
568 bool skipped_low_priority_prune = false;
569 bool check_low_priority =
570 id != LOG_ID_SECURITY && prune_->HasLowPriorityPruneRules() && !clearAll;
571 it = GetOldest(id);
572 while (pruneRows > 0 && it != logs().end()) {
573 LogBufferElement& element = *it;
574
575 if (element.log_id() != id) {
576 it++;
577 continue;
578 }
579
580 if (oldest && oldest->start() <= element.sequence()) {
581 if (!skipped_low_priority_prune) KickReader(oldest, id, pruneRows);
582 break;
583 }
584
585 if (check_low_priority && !element.dropped_count() && prune_->IsLowPriority(&element)) {
586 skipped_low_priority_prune = true;
587 it++;
588 continue;
589 }
590
591 it = Erase(it);
592 pruneRows--;
593 }
594
595 // Third prune pass.
596 if (skipped_low_priority_prune && pruneRows > 0) {
597 it = GetOldest(id);
598 while (it != logs().end() && pruneRows > 0) {
599 LogBufferElement& element = *it;
600
601 if (element.log_id() != id) {
602 ++it;
603 continue;
604 }
605
606 if (oldest && oldest->start() <= element.sequence()) {
607 KickReader(oldest, id, pruneRows);
608 break;
609 }
610
611 it = Erase(it);
612 pruneRows--;
613 }
614 }
615
616 return pruneRows == 0 || it == logs().end();
617 }
618