• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/common/metrics_helpers.h"
6 
7 #if defined(USE_SYSTEM_LIBBZ2)
8 #include <bzlib.h>
9 #else
10 #include "third_party/bzip2/bzlib.h"
11 #endif
12 
13 #include "base/base64.h"
14 #include "base/time.h"
15 #include "base/basictypes.h"
16 #include "base/file_util.h"
17 #include "base/md5.h"
18 #include "base/perftimer.h"
19 #include "base/string_number_conversions.h"
20 #include "base/sys_info.h"
21 #include "base/utf_string_conversions.h"
22 #include "base/third_party/nspr/prtime.h"
23 #include "chrome/common/logging_chrome.h"
24 #include "googleurl/src/gurl.h"
25 #include "libxml/xmlwriter.h"
26 
27 #define OPEN_ELEMENT_FOR_SCOPE(name) ScopedElement scoped_element(this, name)
28 
29 using base::Histogram;
30 using base::StatisticsRecorder;
31 using base::Time;
32 using base::TimeDelta;
33 
34 // http://blogs.msdn.com/oldnewthing/archive/2004/10/25/247180.aspx
35 #if defined(OS_WIN)
36 extern "C" IMAGE_DOS_HEADER __ImageBase;
37 #endif
38 
39 namespace {
40 
41 // libxml take xmlChar*, which is unsigned char*
UnsignedChar(const char * input)42 inline const unsigned char* UnsignedChar(const char* input) {
43   return reinterpret_cast<const unsigned char*>(input);
44 }
45 
46 }  // namespace
47 
48 class MetricsLogBase::XmlWrapper {
49  public:
XmlWrapper()50   XmlWrapper()
51       : doc_(NULL),
52         buffer_(NULL),
53         writer_(NULL) {
54     buffer_ = xmlBufferCreate();
55     CHECK(buffer_);
56 
57     #if defined(OS_CHROMEOS)
58       writer_ = xmlNewTextWriterDoc(&doc_, /* compression */ 0);
59     #else
60       writer_ = xmlNewTextWriterMemory(buffer_, /* compression */ 0);
61     #endif  // OS_CHROMEOS
62     DCHECK(writer_);
63 
64     int result = xmlTextWriterSetIndent(writer_, 2);
65     DCHECK_EQ(0, result);
66   }
67 
~XmlWrapper()68   ~XmlWrapper() {
69     FreeDocWriter();
70     if (buffer_) {
71       xmlBufferFree(buffer_);
72       buffer_ = NULL;
73     }
74   }
75 
FreeDocWriter()76   void FreeDocWriter() {
77     if (writer_) {
78       xmlFreeTextWriter(writer_);
79       writer_ = NULL;
80     }
81     if (doc_) {
82       xmlFreeDoc(doc_);
83       doc_ = NULL;
84     }
85   }
86 
doc() const87   xmlDocPtr doc() const { return doc_; }
writer() const88   xmlTextWriterPtr writer() const { return writer_; }
buffer() const89   xmlBufferPtr buffer() const { return buffer_; }
90 
91  private:
92   xmlDocPtr doc_;
93   xmlBufferPtr buffer_;
94   xmlTextWriterPtr writer_;
95 };
96 
97 // static
98 std::string MetricsLogBase::version_extension_;
99 
MetricsLogBase(const std::string & client_id,int session_id,const std::string & version_string)100 MetricsLogBase::MetricsLogBase(const std::string& client_id, int session_id,
101                                const std::string& version_string)
102     : start_time_(Time::Now()),
103       client_id_(client_id),
104       session_id_(base::IntToString(session_id)),
105       locked_(false),
106       xml_wrapper_(new XmlWrapper),
107       num_events_(0) {
108 
109   StartElement("log");
110   WriteAttribute("clientid", client_id_);
111   WriteInt64Attribute("buildtime", GetBuildTime());
112   WriteAttribute("appversion", version_string);
113 }
114 
~MetricsLogBase()115 MetricsLogBase::~MetricsLogBase() {
116   delete xml_wrapper_;
117 }
118 
CloseLog()119 void MetricsLogBase::CloseLog() {
120   DCHECK(!locked_);
121   locked_ = true;
122 
123   int result = xmlTextWriterEndDocument(xml_wrapper_->writer());
124   DCHECK_GE(result, 0);
125 
126   result = xmlTextWriterFlush(xml_wrapper_->writer());
127   DCHECK_GE(result, 0);
128 
129 #if defined(OS_CHROMEOS)
130   xmlNodePtr root = xmlDocGetRootElement(xml_wrapper_->doc());
131   if (!hardware_class_.empty()) {
132     // The hardware class is determined after the first ongoing log is
133     // constructed, so this adds the root element's "hardwareclass"
134     // attribute when the log is closed instead.
135     xmlNewProp(root, UnsignedChar("hardwareclass"),
136                UnsignedChar(hardware_class_.c_str()));
137   }
138 
139   // Flattens the XML tree into a character buffer.
140   PerfTimer dump_timer;
141   result = xmlNodeDump(xml_wrapper_->buffer(), xml_wrapper_->doc(),
142                        root, /* level */ 0, /* format */ 1);
143   DCHECK_GE(result, 0);
144   UMA_HISTOGRAM_TIMES("UMA.XMLNodeDumpTime", dump_timer.Elapsed());
145 
146   PerfTimer free_timer;
147   xml_wrapper_->FreeDocWriter();
148   UMA_HISTOGRAM_TIMES("UMA.XMLWriterDestructionTime", free_timer.Elapsed());
149 #endif  // OS_CHROMEOS
150 }
151 
GetEncodedLogSize()152 int MetricsLogBase::GetEncodedLogSize() {
153   DCHECK(locked_);
154   CHECK(xml_wrapper_);
155   CHECK(xml_wrapper_->buffer());
156   return xml_wrapper_->buffer()->use;
157 }
158 
GetEncodedLog(char * buffer,int buffer_size)159 bool MetricsLogBase::GetEncodedLog(char* buffer, int buffer_size) {
160   DCHECK(locked_);
161   if (buffer_size < GetEncodedLogSize())
162     return false;
163 
164   memcpy(buffer, xml_wrapper_->buffer()->content, GetEncodedLogSize());
165   return true;
166 }
167 
GetEncodedLogString()168 std::string MetricsLogBase::GetEncodedLogString() {
169   DCHECK(locked_);
170   return std::string(reinterpret_cast<char*>(xml_wrapper_->buffer()->content));
171 }
172 
GetElapsedSeconds()173 int MetricsLogBase::GetElapsedSeconds() {
174   return static_cast<int>((Time::Now() - start_time_).InSeconds());
175 }
176 
CreateHash(const std::string & value)177 std::string MetricsLogBase::CreateHash(const std::string& value) {
178   MD5Context ctx;
179   MD5Init(&ctx);
180   MD5Update(&ctx, value.data(), value.length());
181 
182   MD5Digest digest;
183   MD5Final(&digest, &ctx);
184 
185   uint64 reverse_uint64;
186   // UMA only uses first 8 chars of hash. We use the above uint64 instead
187   // of a unsigned char[8] so that we don't run into strict aliasing issues
188   // in the LOG statement below when trying to interpret reverse as a uint64.
189   unsigned char* reverse = reinterpret_cast<unsigned char *>(&reverse_uint64);
190   DCHECK(arraysize(digest.a) >= sizeof(reverse_uint64));
191   for (size_t i = 0; i < sizeof(reverse_uint64); ++i)
192     reverse[i] = digest.a[sizeof(reverse_uint64) - i - 1];
193   // The following log is VERY helpful when folks add some named histogram into
194   // the code, but forgot to update the descriptive list of histograms.  When
195   // that happens, all we get to see (server side) is a hash of the histogram
196   // name.  We can then use this logging to find out what histogram name was
197   // being hashed to a given MD5 value by just running the version of Chromium
198   // in question with --enable-logging.
199   VLOG(1) << "Metrics: Hash numeric [" << value
200           << "]=[" << reverse_uint64 << "]";
201   return std::string(reinterpret_cast<char*>(digest.a), arraysize(digest.a));
202 }
203 
CreateBase64Hash(const std::string & string)204 std::string MetricsLogBase::CreateBase64Hash(const std::string& string) {
205   std::string encoded_digest;
206   if (base::Base64Encode(CreateHash(string), &encoded_digest)) {
207     DVLOG(1) << "Metrics: Hash [" << encoded_digest << "]=[" << string << "]";
208     return encoded_digest;
209   }
210   return std::string();
211 }
212 
RecordUserAction(const char * key)213 void MetricsLogBase::RecordUserAction(const char* key) {
214   DCHECK(!locked_);
215 
216   std::string command_hash = CreateBase64Hash(key);
217   if (command_hash.empty()) {
218     NOTREACHED() << "Unable generate encoded hash of command: " << key;
219     return;
220   }
221 
222   OPEN_ELEMENT_FOR_SCOPE("uielement");
223   WriteAttribute("action", "command");
224   WriteAttribute("targetidhash", command_hash);
225 
226   // TODO(jhughes): Properly track windows.
227   WriteIntAttribute("window", 0);
228   WriteCommonEventAttributes();
229 
230   ++num_events_;
231 }
232 
RecordLoadEvent(int window_id,const GURL & url,PageTransition::Type origin,int session_index,TimeDelta load_time)233 void MetricsLogBase::RecordLoadEvent(int window_id,
234                                  const GURL& url,
235                                  PageTransition::Type origin,
236                                  int session_index,
237                                  TimeDelta load_time) {
238   DCHECK(!locked_);
239 
240   OPEN_ELEMENT_FOR_SCOPE("document");
241   WriteAttribute("action", "load");
242   WriteIntAttribute("docid", session_index);
243   WriteIntAttribute("window", window_id);
244   WriteAttribute("loadtime", base::Int64ToString(load_time.InMilliseconds()));
245 
246   std::string origin_string;
247 
248   switch (PageTransition::StripQualifier(origin)) {
249     // TODO(jhughes): Some of these mappings aren't right... we need to add
250     // some values to the server's enum.
251     case PageTransition::LINK:
252     case PageTransition::MANUAL_SUBFRAME:
253       origin_string = "link";
254       break;
255 
256     case PageTransition::TYPED:
257       origin_string = "typed";
258       break;
259 
260     case PageTransition::AUTO_BOOKMARK:
261       origin_string = "bookmark";
262       break;
263 
264     case PageTransition::AUTO_SUBFRAME:
265     case PageTransition::RELOAD:
266       origin_string = "refresh";
267       break;
268 
269     case PageTransition::GENERATED:
270     case PageTransition::KEYWORD:
271       origin_string = "global-history";
272       break;
273 
274     case PageTransition::START_PAGE:
275       origin_string = "start-page";
276       break;
277 
278     case PageTransition::FORM_SUBMIT:
279       origin_string = "form-submit";
280       break;
281 
282     default:
283       NOTREACHED() << "Received an unknown page transition type: " <<
284                        PageTransition::StripQualifier(origin);
285   }
286   if (!origin_string.empty())
287     WriteAttribute("origin", origin_string);
288 
289   WriteCommonEventAttributes();
290 
291   ++num_events_;
292 }
293 
RecordWindowEvent(WindowEventType type,int window_id,int parent_id)294 void MetricsLogBase::RecordWindowEvent(WindowEventType type,
295                                    int window_id,
296                                    int parent_id) {
297   DCHECK(!locked_);
298 
299   OPEN_ELEMENT_FOR_SCOPE("window");
300   WriteAttribute("action", WindowEventTypeToString(type));
301   WriteAttribute("windowid", base::IntToString(window_id));
302   if (parent_id >= 0)
303     WriteAttribute("parent", base::IntToString(parent_id));
304   WriteCommonEventAttributes();
305 
306   ++num_events_;
307 }
308 
GetCurrentTimeString()309 std::string MetricsLogBase::GetCurrentTimeString() {
310   return base::Uint64ToString(Time::Now().ToTimeT());
311 }
312 
313 // These are the attributes that are common to every event.
WriteCommonEventAttributes()314 void MetricsLogBase::WriteCommonEventAttributes() {
315   WriteAttribute("session", session_id_);
316   WriteAttribute("time", GetCurrentTimeString());
317 }
318 
WriteAttribute(const std::string & name,const std::string & value)319 void MetricsLogBase::WriteAttribute(const std::string& name,
320                                 const std::string& value) {
321   DCHECK(!locked_);
322   DCHECK(!name.empty());
323 
324   int result = xmlTextWriterWriteAttribute(xml_wrapper_->writer(),
325                                            UnsignedChar(name.c_str()),
326                                            UnsignedChar(value.c_str()));
327   DCHECK_GE(result, 0);
328 }
329 
WriteIntAttribute(const std::string & name,int value)330 void MetricsLogBase::WriteIntAttribute(const std::string& name, int value) {
331   WriteAttribute(name, base::IntToString(value));
332 }
333 
WriteInt64Attribute(const std::string & name,int64 value)334 void MetricsLogBase::WriteInt64Attribute(const std::string& name, int64 value) {
335   WriteAttribute(name, base::Int64ToString(value));
336 }
337 
338 // static
WindowEventTypeToString(WindowEventType type)339 const char* MetricsLogBase::WindowEventTypeToString(WindowEventType type) {
340   switch (type) {
341     case WINDOW_CREATE:  return "create";
342     case WINDOW_OPEN:    return "open";
343     case WINDOW_CLOSE:   return "close";
344     case WINDOW_DESTROY: return "destroy";
345 
346     default:
347       NOTREACHED();
348       return "unknown";  // Can't return NULL as this is used in a required
349                          // attribute.
350   }
351 }
352 
StartElement(const char * name)353 void MetricsLogBase::StartElement(const char* name) {
354   DCHECK(!locked_);
355   DCHECK(name);
356 
357   int result = xmlTextWriterStartElement(xml_wrapper_->writer(),
358                                          UnsignedChar(name));
359   DCHECK_GE(result, 0);
360 }
361 
EndElement()362 void MetricsLogBase::EndElement() {
363   DCHECK(!locked_);
364 
365   int result = xmlTextWriterEndElement(xml_wrapper_->writer());
366   DCHECK_GE(result, 0);
367 }
368 
369 // static
GetBuildTime()370 int64 MetricsLogBase::GetBuildTime() {
371   static int64 integral_build_time = 0;
372   if (!integral_build_time) {
373     Time time;
374     const char* kDateTime = __DATE__ " " __TIME__ " GMT";
375     bool result = Time::FromString(ASCIIToWide(kDateTime).c_str(), &time);
376     DCHECK(result);
377     integral_build_time = static_cast<int64>(time.ToTimeT());
378   }
379   return integral_build_time;
380 }
381 
AsMetricsLog()382 MetricsLog* MetricsLogBase::AsMetricsLog() {
383   return NULL;
384 }
385 
386 // TODO(JAR): A The following should really be part of the histogram class.
387 // Internal state is being needlessly exposed, and it would be hard to reuse
388 // this code. If we moved this into the Histogram class, then we could use
389 // the same infrastructure for logging StatsCounters, RatesCounters, etc.
RecordHistogramDelta(const Histogram & histogram,const Histogram::SampleSet & snapshot)390 void MetricsLogBase::RecordHistogramDelta(
391     const Histogram& histogram,
392     const Histogram::SampleSet& snapshot) {
393   DCHECK(!locked_);
394   DCHECK_NE(0, snapshot.TotalCount());
395   snapshot.CheckSize(histogram);
396 
397   // We will ignore the MAX_INT/infinite value in the last element of range[].
398 
399   OPEN_ELEMENT_FOR_SCOPE("histogram");
400 
401   WriteAttribute("name", CreateBase64Hash(histogram.histogram_name()));
402 
403   WriteInt64Attribute("sum", snapshot.sum());
404   // TODO(jar): Remove sumsquares when protobuffer accepts this as optional.
405   WriteInt64Attribute("sumsquares", 0);
406 
407   for (size_t i = 0; i < histogram.bucket_count(); i++) {
408     if (snapshot.counts(i)) {
409       OPEN_ELEMENT_FOR_SCOPE("histogrambucket");
410       WriteIntAttribute("min", histogram.ranges(i));
411       WriteIntAttribute("max", histogram.ranges(i + 1));
412       WriteIntAttribute("count", snapshot.counts(i));
413     }
414   }
415 }
416 
417 
418 // MetricsServiceBase
MetricsServiceBase()419 MetricsServiceBase::MetricsServiceBase()
420     : pending_log_(NULL),
421       compressed_log_(),
422       current_log_(NULL) {
423 }
424 
~MetricsServiceBase()425 MetricsServiceBase::~MetricsServiceBase() {
426   if (pending_log_) {
427     delete pending_log_;
428     pending_log_ = NULL;
429   }
430   if (current_log_) {
431     delete current_log_;
432     current_log_ = NULL;
433   }
434 }
435 
436 // This implementation is based on the Firefox MetricsService implementation.
Bzip2Compress(const std::string & input,std::string * output)437 bool MetricsServiceBase::Bzip2Compress(const std::string& input,
438                                        std::string* output) {
439   bz_stream stream = {0};
440   // As long as our input is smaller than the bzip2 block size, we should get
441   // the best compression.  For example, if your input was 250k, using a block
442   // size of 300k or 500k should result in the same compression ratio.  Since
443   // our data should be under 100k, using the minimum block size of 100k should
444   // allocate less temporary memory, but result in the same compression ratio.
445   int result = BZ2_bzCompressInit(&stream,
446                                   1,   // 100k (min) block size
447                                   0,   // quiet
448                                   0);  // default "work factor"
449   if (result != BZ_OK) {  // out of memory?
450     return false;
451   }
452 
453   output->clear();
454 
455   stream.next_in = const_cast<char*>(input.data());
456   stream.avail_in = static_cast<int>(input.size());
457   // NOTE: we don't need a BZ_RUN phase since our input buffer contains
458   //       the entire input
459   do {
460     output->resize(output->size() + 1024);
461     stream.next_out = &((*output)[stream.total_out_lo32]);
462     stream.avail_out = static_cast<int>(output->size()) - stream.total_out_lo32;
463     result = BZ2_bzCompress(&stream, BZ_FINISH);
464   } while (result == BZ_FINISH_OK);
465   if (result != BZ_STREAM_END)  // unknown failure?
466     return false;
467   result = BZ2_bzCompressEnd(&stream);
468   DCHECK(result == BZ_OK);
469 
470   output->resize(stream.total_out_lo32);
471 
472   return true;
473 }
474 
DiscardPendingLog()475 void MetricsServiceBase::DiscardPendingLog() {
476   if (pending_log_) {  // Shutdown might have deleted it!
477     delete pending_log_;
478     pending_log_ = NULL;
479   }
480   compressed_log_.clear();
481 }
482 
RecordCurrentHistograms()483 void MetricsServiceBase::RecordCurrentHistograms() {
484   DCHECK(current_log_);
485   TransmitAllHistograms(base::Histogram::kNoFlags, true);
486 }
487 
TransmitHistogramDelta(const base::Histogram & histogram,const base::Histogram::SampleSet & snapshot)488 void MetricsServiceBase::TransmitHistogramDelta(
489       const base::Histogram& histogram,
490       const base::Histogram::SampleSet& snapshot) {
491   current_log_->RecordHistogramDelta(histogram, snapshot);
492 }
493 
InconsistencyDetected(int problem)494 void MetricsServiceBase::InconsistencyDetected(int problem) {
495   UMA_HISTOGRAM_ENUMERATION("Histogram.InconsistenciesBrowser",
496                             problem, Histogram::NEVER_EXCEEDED_VALUE);
497 }
498 
UniqueInconsistencyDetected(int problem)499 void MetricsServiceBase::UniqueInconsistencyDetected(int problem) {
500   UMA_HISTOGRAM_ENUMERATION("Histogram.InconsistenciesBrowserUnique",
501                             problem, Histogram::NEVER_EXCEEDED_VALUE);
502 }
503 
SnapshotProblemResolved(int amount)504 void MetricsServiceBase::SnapshotProblemResolved(int amount) {
505   UMA_HISTOGRAM_COUNTS("Histogram.InconsistentSnapshotBrowser",
506                        std::abs(amount));
507 }
508 
HistogramSender()509 HistogramSender::HistogramSender() {}
510 
~HistogramSender()511 HistogramSender::~HistogramSender() {}
512 
TransmitAllHistograms(Histogram::Flags flag_to_set,bool send_only_uma)513 void HistogramSender::TransmitAllHistograms(Histogram::Flags flag_to_set,
514                                             bool send_only_uma) {
515   StatisticsRecorder::Histograms histograms;
516   StatisticsRecorder::GetHistograms(&histograms);
517   for (StatisticsRecorder::Histograms::const_iterator it = histograms.begin();
518        histograms.end() != it;
519        ++it) {
520     (*it)->SetFlags(flag_to_set);
521     if (send_only_uma &&
522         0 == ((*it)->flags() & Histogram::kUmaTargetedHistogramFlag))
523       continue;
524     TransmitHistogram(**it);
525   }
526 }
527 
TransmitHistogram(const Histogram & histogram)528 void HistogramSender::TransmitHistogram(const Histogram& histogram) {
529   // Get up-to-date snapshot of sample stats.
530   Histogram::SampleSet snapshot;
531   histogram.SnapshotSample(&snapshot);
532   const std::string& histogram_name = histogram.histogram_name();
533 
534   int corruption = histogram.FindCorruption(snapshot);
535 
536   // Crash if we detect that our histograms have been overwritten.  This may be
537   // a fair distance from the memory smasher, but we hope to correlate these
538   // crashes with other events, such as plugins, or usage patterns, etc.
539   if (Histogram::BUCKET_ORDER_ERROR & corruption) {
540     // The checksum should have caught this, so crash separately if it didn't.
541     CHECK_NE(0, Histogram::RANGE_CHECKSUM_ERROR & corruption);
542     CHECK(false);  // Crash for the bucket order corruption.
543   }
544   // Checksum corruption might not have caused order corruption.
545   CHECK_EQ(0, Histogram::RANGE_CHECKSUM_ERROR & corruption);
546 
547   if (corruption) {
548     NOTREACHED();
549     InconsistencyDetected(corruption);
550     // Don't send corrupt data to metrics survices.
551     if (NULL == inconsistencies_.get())
552       inconsistencies_.reset(new ProblemMap);
553     int old_corruption = (*inconsistencies_)[histogram_name];
554     if (old_corruption == (corruption | old_corruption))
555       return;  // We've already seen this corruption for this histogram.
556     (*inconsistencies_)[histogram_name] |= corruption;
557     UniqueInconsistencyDetected(corruption);
558     return;
559   }
560 
561   // Find the already sent stats, or create an empty set.  Remove from our
562   // snapshot anything that we've already sent.
563   LoggedSampleMap::iterator it = logged_samples_.find(histogram_name);
564   Histogram::SampleSet* already_logged;
565   if (logged_samples_.end() == it) {
566     // Add new entry
567     already_logged = &logged_samples_[histogram.histogram_name()];
568     already_logged->Resize(histogram);  // Complete initialization.
569   } else {
570     already_logged = &(it->second);
571     int64 discrepancy(already_logged->TotalCount() -
572                     already_logged->redundant_count());
573     if (discrepancy) {
574       NOTREACHED();  // Already_logged has become corrupt.
575       int problem = static_cast<int>(discrepancy);
576       if (problem != discrepancy)
577         problem = INT_MAX;
578       SnapshotProblemResolved(problem);
579       // With no valid baseline, we'll act like we've sent everything in our
580       // snapshot.
581       already_logged->Subtract(*already_logged);
582       already_logged->Add(snapshot);
583     }
584     // Deduct any stats we've already logged from our snapshot.
585     snapshot.Subtract(*already_logged);
586   }
587 
588   // Snapshot now contains only a delta to what we've already_logged.
589   if (snapshot.redundant_count() > 0) {
590     TransmitHistogramDelta(histogram, snapshot);
591     // Add new data into our running total.
592     already_logged->Add(snapshot);
593   }
594 }
595