• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/metrics/persisted_logs.h"
6 
7 #include <string>
8 
9 #include "base/base64.h"
10 #include "base/md5.h"
11 #include "base/metrics/histogram.h"
12 #include "base/prefs/pref_service.h"
13 #include "base/prefs/scoped_user_pref_update.h"
14 #include "base/sha1.h"
15 #include "base/timer/elapsed_timer.h"
16 #include "components/metrics/compression_utils.h"
17 
18 namespace metrics {
19 
20 namespace {
21 
MakeRecallStatusHistogram(PersistedLogs::LogReadStatus status)22 PersistedLogs::LogReadStatus MakeRecallStatusHistogram(
23     PersistedLogs::LogReadStatus status) {
24   UMA_HISTOGRAM_ENUMERATION("PrefService.PersistentLogRecallProtobufs",
25                             status, PersistedLogs::END_RECALL_STATUS);
26   return status;
27 }
28 
29 // Reads the value at |index| from |list_value| as a string and Base64-decodes
30 // it into |result|. Returns true on success.
ReadBase64String(const base::ListValue & list_value,size_t index,std::string * result)31 bool ReadBase64String(const base::ListValue& list_value,
32                       size_t index,
33                       std::string* result) {
34   std::string base64_result;
35   if (!list_value.GetString(index, &base64_result))
36     return false;
37   return base::Base64Decode(base64_result, result);
38 }
39 
40 // Base64-encodes |str| and appends the result to |list_value|.
AppendBase64String(const std::string & str,base::ListValue * list_value)41 void AppendBase64String(const std::string& str, base::ListValue* list_value) {
42   std::string base64_str;
43   base::Base64Encode(str, &base64_str);
44   list_value->Append(base::Value::CreateStringValue(base64_str));
45 }
46 
47 }  // namespace
48 
Init(const std::string & log_data)49 void PersistedLogs::LogHashPair::Init(const std::string& log_data) {
50   DCHECK(!log_data.empty());
51 
52   if (!GzipCompress(log_data, &compressed_log_data)) {
53     NOTREACHED();
54     return;
55   }
56 
57   UMA_HISTOGRAM_PERCENTAGE(
58       "UMA.ProtoCompressionRatio",
59       static_cast<int>(100 * compressed_log_data.size() / log_data.size()));
60   UMA_HISTOGRAM_CUSTOM_COUNTS(
61       "UMA.ProtoGzippedKBSaved",
62       static_cast<int>((log_data.size() - compressed_log_data.size()) / 1024),
63       1, 2000, 50);
64 
65   hash = base::SHA1HashString(log_data);
66 }
67 
Clear()68 void PersistedLogs::LogHashPair::Clear() {
69   compressed_log_data.clear();
70   hash.clear();
71 }
72 
Swap(PersistedLogs::LogHashPair * input)73 void PersistedLogs::LogHashPair::Swap(PersistedLogs::LogHashPair* input) {
74   compressed_log_data.swap(input->compressed_log_data);
75   hash.swap(input->hash);
76 }
77 
PersistedLogs(PrefService * local_state,const char * pref_name,const char * old_pref_name,size_t min_log_count,size_t min_log_bytes,size_t max_log_size)78 PersistedLogs::PersistedLogs(PrefService* local_state,
79                              const char* pref_name,
80                              const char* old_pref_name,
81                              size_t min_log_count,
82                              size_t min_log_bytes,
83                              size_t max_log_size)
84     : local_state_(local_state),
85       pref_name_(pref_name),
86       old_pref_name_(old_pref_name),
87       min_log_count_(min_log_count),
88       min_log_bytes_(min_log_bytes),
89       max_log_size_(max_log_size),
90       last_provisional_store_index_(-1) {
91   DCHECK(local_state_);
92   // One of the limit arguments must be non-zero.
93   DCHECK(min_log_count_ > 0 || min_log_bytes_ > 0);
94 }
95 
~PersistedLogs()96 PersistedLogs::~PersistedLogs() {}
97 
SerializeLogs()98 void PersistedLogs::SerializeLogs() {
99   // Remove any logs that are over the serialization size limit.
100   if (max_log_size_) {
101     for (std::vector<LogHashPair>::iterator it = list_.begin();
102          it != list_.end();) {
103       size_t log_size = it->compressed_log_data.length();
104       if (log_size > max_log_size_) {
105         UMA_HISTOGRAM_COUNTS("UMA.Large Accumulated Log Not Persisted",
106                              static_cast<int>(log_size));
107         it = list_.erase(it);
108       } else {
109         ++it;
110       }
111     }
112   }
113 
114   ListPrefUpdate update(local_state_, pref_name_);
115   WriteLogsToPrefList(update.Get());
116 
117   // Clear the old pref now that we've written to the new one.
118   // TODO(asvitkine): Remove the old pref in M39.
119   local_state_->ClearPref(old_pref_name_);
120 }
121 
DeserializeLogs()122 PersistedLogs::LogReadStatus PersistedLogs::DeserializeLogs() {
123   // First, try reading from old pref. If it's empty, read from the new one.
124   // TODO(asvitkine): Remove the old pref in M39.
125   const base::ListValue* unsent_logs = local_state_->GetList(old_pref_name_);
126   if (!unsent_logs->empty())
127     return ReadLogsFromOldPrefList(*unsent_logs);
128 
129   unsent_logs = local_state_->GetList(pref_name_);
130   return ReadLogsFromPrefList(*unsent_logs);
131 }
132 
StoreLog(const std::string & log_data)133 void PersistedLogs::StoreLog(const std::string& log_data) {
134   list_.push_back(LogHashPair());
135   list_.back().Init(log_data);
136 }
137 
StageLog()138 void PersistedLogs::StageLog() {
139   // CHECK, rather than DCHECK, because swap()ing with an empty list causes
140   // hard-to-identify crashes much later.
141   CHECK(!list_.empty());
142   DCHECK(!has_staged_log());
143   staged_log_.Swap(&list_.back());
144   list_.pop_back();
145 
146   // If the staged log was the last provisional store, clear that.
147   if (static_cast<size_t>(last_provisional_store_index_) == list_.size())
148     last_provisional_store_index_ = -1;
149   DCHECK(has_staged_log());
150 }
151 
DiscardStagedLog()152 void PersistedLogs::DiscardStagedLog() {
153   DCHECK(has_staged_log());
154   staged_log_.Clear();
155 }
156 
StoreStagedLogAsUnsent(StoreType store_type)157 void PersistedLogs::StoreStagedLogAsUnsent(StoreType store_type) {
158   list_.push_back(LogHashPair());
159   list_.back().Swap(&staged_log_);
160   if (store_type == PROVISIONAL_STORE)
161     last_provisional_store_index_ = list_.size() - 1;
162 }
163 
DiscardLastProvisionalStore()164 void PersistedLogs::DiscardLastProvisionalStore() {
165   if (last_provisional_store_index_ == -1)
166     return;
167   DCHECK_LT(static_cast<size_t>(last_provisional_store_index_), list_.size());
168   list_.erase(list_.begin() + last_provisional_store_index_);
169   last_provisional_store_index_ = -1;
170 }
171 
WriteLogsToPrefList(base::ListValue * list_value)172 void PersistedLogs::WriteLogsToPrefList(base::ListValue* list_value) {
173   list_value->Clear();
174   // Leave the list completely empty if there are no storable values.
175   if (list_.empty())
176     return;
177 
178   size_t start = 0;
179   // If there are too many logs, keep the most recent logs up to the length
180   // limit, and at least to the minimum number of bytes.
181   if (list_.size() > min_log_count_) {
182     start = list_.size();
183     size_t bytes_used = 0;
184     std::vector<LogHashPair>::const_reverse_iterator end = list_.rend();
185     for (std::vector<LogHashPair>::const_reverse_iterator it = list_.rbegin();
186          it != end; ++it) {
187       const size_t log_size = it->compressed_log_data.length();
188       if (bytes_used >= min_log_bytes_ &&
189           (list_.size() - start) >= min_log_count_) {
190         break;
191       }
192       bytes_used += log_size;
193       --start;
194     }
195   }
196   DCHECK_LT(start, list_.size());
197 
198   for (size_t i = start; i < list_.size(); ++i) {
199     AppendBase64String(list_[i].compressed_log_data, list_value);
200     AppendBase64String(list_[i].hash, list_value);
201   }
202 }
203 
ReadLogsFromPrefList(const base::ListValue & list_value)204 PersistedLogs::LogReadStatus PersistedLogs::ReadLogsFromPrefList(
205     const base::ListValue& list_value) {
206   if (list_value.empty())
207     return MakeRecallStatusHistogram(LIST_EMPTY);
208 
209   // For each log, there's two entries in the list (the data and the hash).
210   DCHECK_EQ(0U, list_value.GetSize() % 2);
211   const size_t log_count = list_value.GetSize() / 2;
212 
213   // Resize |list_| ahead of time, so that values can be decoded directly into
214   // the elements of the list.
215   DCHECK(list_.empty());
216   list_.resize(log_count);
217 
218   for (size_t i = 0; i < log_count; ++i) {
219     if (!ReadBase64String(list_value, i * 2, &list_[i].compressed_log_data) ||
220         !ReadBase64String(list_value, i * 2 + 1, &list_[i].hash)) {
221       list_.clear();
222       return MakeRecallStatusHistogram(LOG_STRING_CORRUPTION);
223     }
224   }
225 
226   return MakeRecallStatusHistogram(RECALL_SUCCESS);
227 }
228 
ReadLogsFromOldPrefList(const base::ListValue & list_value)229 PersistedLogs::LogReadStatus PersistedLogs::ReadLogsFromOldPrefList(
230     const base::ListValue& list_value) {
231   // We append (2) more elements to persisted lists: the size of the list and a
232   // checksum of the elements.
233   const size_t kChecksumEntryCount = 2;
234 
235   if (list_value.GetSize() == 0)
236     return MakeRecallStatusHistogram(LIST_EMPTY);
237   if (list_value.GetSize() <= kChecksumEntryCount)
238     return MakeRecallStatusHistogram(LIST_SIZE_TOO_SMALL);
239 
240   // The size is stored at the beginning of the list_value.
241   int size;
242   bool valid = (*list_value.begin())->GetAsInteger(&size);
243   if (!valid)
244     return MakeRecallStatusHistogram(LIST_SIZE_MISSING);
245   // Account for checksum and size included in the list_value.
246   if (static_cast<size_t>(size) != list_value.GetSize() - kChecksumEntryCount)
247     return MakeRecallStatusHistogram(LIST_SIZE_CORRUPTION);
248 
249   // Allocate strings for all of the logs we are going to read in.
250   // Do this ahead of time so that we can decode the string values directly into
251   // the elements of |list_|, and thereby avoid making copies of the
252   // serialized logs, which can be fairly large.
253   DCHECK(list_.empty());
254   list_.resize(size);
255 
256   base::MD5Context ctx;
257   base::MD5Init(&ctx);
258   std::string encoded_log;
259   size_t local_index = 0;
260   for (base::ListValue::const_iterator it = list_value.begin() + 1;
261        it != list_value.end() - 1;  // Last element is the checksum.
262        ++it, ++local_index) {
263     bool valid = (*it)->GetAsString(&encoded_log);
264     if (!valid) {
265       list_.clear();
266       return MakeRecallStatusHistogram(LOG_STRING_CORRUPTION);
267     }
268 
269     base::MD5Update(&ctx, encoded_log);
270 
271     std::string log_text;
272     if (!base::Base64Decode(encoded_log, &log_text)) {
273       list_.clear();
274       return MakeRecallStatusHistogram(DECODE_FAIL);
275     }
276 
277     DCHECK_LT(local_index, list_.size());
278     list_[local_index].Init(log_text);
279   }
280 
281   // Verify checksum.
282   base::MD5Digest digest;
283   base::MD5Final(&digest, &ctx);
284   std::string recovered_md5;
285   // We store the hash at the end of the list_value.
286   valid = (*(list_value.end() - 1))->GetAsString(&recovered_md5);
287   if (!valid) {
288     list_.clear();
289     return MakeRecallStatusHistogram(CHECKSUM_STRING_CORRUPTION);
290   }
291   if (recovered_md5 != base::MD5DigestToBase16(digest)) {
292     list_.clear();
293     return MakeRecallStatusHistogram(CHECKSUM_CORRUPTION);
294   }
295   return MakeRecallStatusHistogram(RECALL_SUCCESS);
296 }
297 
298 }  // namespace metrics
299