• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/history/in_memory_url_index.h"
6 
7 #include <algorithm>
8 #include <iterator>
9 #include <limits>
10 #include <numeric>
11 
12 #include "base/file_util.h"
13 #include "base/i18n/break_iterator.h"
14 #include "base/metrics/histogram.h"
15 #include "base/string_util.h"
16 #include "base/time.h"
17 #include "base/utf_string_conversions.h"
18 #include "chrome/browser/autocomplete/autocomplete.h"
19 #include "chrome/browser/autocomplete/history_provider_util.h"
20 #include "chrome/browser/history/url_database.h"
21 #include "chrome/browser/profiles/profile.h"
22 #include "chrome/common/url_constants.h"
23 #include "googleurl/src/url_util.h"
24 #include "net/base/escape.h"
25 #include "net/base/net_util.h"
26 #include "third_party/protobuf/src/google/protobuf/repeated_field.h"
27 #include "ui/base/l10n/l10n_util.h"
28 
29 using google::protobuf::RepeatedField;
30 using google::protobuf::RepeatedPtrField;
31 using in_memory_url_index::InMemoryURLIndexCacheItem;
32 
33 namespace history {
34 
35 typedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;
36 typedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;
37 typedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;
38 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem;
39 typedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry
40     CharWordMapEntry;
41 typedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem
42     WordIDHistoryMapItem;
43 typedef imui::
44     InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry
45     WordIDHistoryMapEntry;
46 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;
47 typedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry
48     HistoryInfoMapEntry;
49 
50 const size_t InMemoryURLIndex::kNoCachedResultForTerm = -1;
51 
52 // Score ranges used to get a 'base' score for each of the scoring factors
53 // (such as recency of last visit, times visited, times the URL was typed,
54 // and the quality of the string match). There is a matching value range for
55 // each of these scores for each factor.
56 const int kScoreRank[] = { 1425, 1200, 900, 400 };
57 
ScoredHistoryMatch()58 ScoredHistoryMatch::ScoredHistoryMatch()
59     : raw_score(0),
60       prefix_adjust(0) {}
61 
ScoredHistoryMatch(const URLRow & url_info)62 ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info)
63     : HistoryMatch(url_info, 0, false, false),
64       raw_score(0),
65       prefix_adjust(0) {}
66 
~ScoredHistoryMatch()67 ScoredHistoryMatch::~ScoredHistoryMatch() {}
68 
69 // Comparison function for sorting ScoredMatches by their scores.
MatchScoreGreater(const ScoredHistoryMatch & m1,const ScoredHistoryMatch & m2)70 bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1,
71                                            const ScoredHistoryMatch& m2) {
72   return m1.raw_score >= m2.raw_score;
73 }
74 
75 struct InMemoryURLIndex::TermCharWordSet {
TermCharWordSethistory::InMemoryURLIndex::TermCharWordSet76   TermCharWordSet()  // Required for STL resize().
77       : char_(0),
78         word_id_set_(),
79         used_(false) {}
TermCharWordSethistory::InMemoryURLIndex::TermCharWordSet80   TermCharWordSet(const char16& uni_char,
81                   const WordIDSet& word_id_set,
82                   bool used)
83       : char_(uni_char),
84         word_id_set_(word_id_set),
85         used_(used) {}
86 
87   // Predicate for STL algorithm use.
is_not_usedhistory::InMemoryURLIndex::TermCharWordSet88   bool is_not_used() const { return !used_; }
89 
90   char16 char_;
91   WordIDSet word_id_set_;
92   bool used_;  // true if this set has been used for the current term search.
93 };
94 
95 // Comparison function for sorting TermMatches by their offsets.
MatchOffsetLess(const TermMatch & m1,const TermMatch & m2)96 bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {
97   return m1.offset < m2.offset;
98 }
99 
100 // std::accumulate helper function to add up TermMatches' lengths.
AccumulateMatchLength(int total,const TermMatch & match)101 int AccumulateMatchLength(int total, const TermMatch& match) {
102   return total + match.length;
103 }
104 
105 // Converts a raw value for some particular scoring factor into a score
106 // component for that factor.  The conversion function is piecewise linear, with
107 // input values provided in |value_ranks| and resulting output scores from
108 // |kScoreRank| (mathematically, f(value_rank[i]) = kScoreRank[i]).  A score
109 // cannot be higher than kScoreRank[0], and drops directly to 0 if lower than
110 // kScoreRank[3].
111 //
112 // For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }.
113 // Because 70 falls between ranks 0 (100) and 1 (50), the score is given by the
114 // linear function:
115 //   score = m * value + b, where
116 //   m = (kScoreRank[0] - kScoreRank[1]) / (value_ranks[0] - value_ranks[1])
117 //   b = value_ranks[1]
118 // Any value higher than 100 would be scored as if it were 100, and any value
119 // lower than 10 scored 0.
ScoreForValue(int value,const int * value_ranks)120 int ScoreForValue(int value, const int* value_ranks) {
121   int i = 0;
122   int rank_count = arraysize(kScoreRank);
123   while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ?
124          (value > value_ranks[i]) : (value < value_ranks[i])))
125     ++i;
126   if (i >= rank_count)
127     return 0;
128   int score = kScoreRank[i];
129   if (i > 0) {
130     score += (value - value_ranks[i]) *
131         (kScoreRank[i - 1] - kScoreRank[i]) /
132         (value_ranks[i - 1] - value_ranks[i]);
133   }
134   return score;
135 }
136 
InMemoryURLIndex(const FilePath & history_dir)137 InMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir)
138     : history_dir_(history_dir),
139       history_item_count_(0) {
140 }
141 
142 // Called only by unit tests.
InMemoryURLIndex()143 InMemoryURLIndex::InMemoryURLIndex()
144     : history_item_count_(0) {
145 }
146 
~InMemoryURLIndex()147 InMemoryURLIndex::~InMemoryURLIndex() {}
148 
149 // Indexing
150 
Init(history::URLDatabase * history_db,const std::string & languages)151 bool InMemoryURLIndex::Init(history::URLDatabase* history_db,
152                             const std::string& languages) {
153   // TODO(mrossetti): Register for profile/language change notifications.
154   languages_ = languages;
155   return ReloadFromHistory(history_db, false);
156 }
157 
ShutDown()158 void InMemoryURLIndex::ShutDown() {
159   // Write our cache.
160   SaveToCacheFile();
161 }
162 
IndexRow(const URLRow & row)163 bool InMemoryURLIndex::IndexRow(const URLRow& row) {
164   const GURL& gurl(row.url());
165   string16 url(net::FormatUrl(gurl, languages_,
166       net::kFormatUrlOmitUsernamePassword,
167       UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
168       NULL, NULL, NULL));
169 
170   HistoryID history_id = static_cast<HistoryID>(row.id());
171   DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max());
172 
173   // Add the row for quick lookup in the history info store.
174   URLRow new_row(GURL(url), row.id());
175   new_row.set_visit_count(row.visit_count());
176   new_row.set_typed_count(row.typed_count());
177   new_row.set_last_visit(row.last_visit());
178   new_row.set_title(row.title());
179   history_info_map_[history_id] = new_row;
180 
181   // Split URL into individual, unique words then add in the title words.
182   url = l10n_util::ToLower(url);
183   String16Set url_words = WordSetFromString16(url);
184   String16Set title_words = WordSetFromString16(row.title());
185   String16Set words;
186   std::set_union(url_words.begin(), url_words.end(),
187                  title_words.begin(), title_words.end(),
188                  std::insert_iterator<String16Set>(words, words.begin()));
189   for (String16Set::iterator word_iter = words.begin();
190        word_iter != words.end(); ++word_iter)
191     AddWordToIndex(*word_iter, history_id);
192 
193   ++history_item_count_;
194   return true;
195 }
196 
ReloadFromHistory(history::URLDatabase * history_db,bool clear_cache)197 bool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,
198                                          bool clear_cache) {
199   ClearPrivateData();
200 
201   if (!history_db)
202     return false;
203 
204   if (clear_cache || !RestoreFromCacheFile()) {
205     base::TimeTicks beginning_time = base::TimeTicks::Now();
206     // The index has to be built from scratch.
207     URLDatabase::URLEnumerator history_enum;
208     if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
209       return false;
210     URLRow row;
211     while (history_enum.GetNextURL(&row)) {
212       if (!IndexRow(row))
213         return false;
214     }
215     UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
216                         base::TimeTicks::Now() - beginning_time);
217     SaveToCacheFile();
218   }
219   return true;
220 }
221 
ClearPrivateData()222 void InMemoryURLIndex::ClearPrivateData() {
223   history_item_count_ = 0;
224   word_list_.clear();
225   word_map_.clear();
226   char_word_map_.clear();
227   word_id_history_map_.clear();
228   term_char_word_set_cache_.clear();
229   history_info_map_.clear();
230 }
231 
RestoreFromCacheFile()232 bool InMemoryURLIndex::RestoreFromCacheFile() {
233   // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.
234   // That is: ensure that the database has not been modified since the cache
235   // was last saved. DB file modification date is inadequate. There are no
236   // SQLite table checksums automatically stored.
237   base::TimeTicks beginning_time = base::TimeTicks::Now();
238   FilePath file_path;
239   if (!GetCacheFilePath(&file_path) || !file_util::PathExists(file_path))
240     return false;
241   std::string data;
242   if (!file_util::ReadFileToString(file_path, &data)) {
243     LOG(WARNING) << "Failed to read InMemoryURLIndex cache from "
244                  << file_path.value();
245     return false;
246   }
247 
248   InMemoryURLIndexCacheItem index_cache;
249   if (!index_cache.ParseFromArray(data.c_str(), data.size())) {
250     LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from "
251                  << file_path.value();
252     return false;
253   }
254 
255   if (!RestorePrivateData(index_cache)) {
256     ClearPrivateData();  // Back to square one -- must build from scratch.
257     return false;
258   }
259 
260   UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
261                       base::TimeTicks::Now() - beginning_time);
262   UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_);
263   UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
264   UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());
265   UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());
266   return true;
267 }
268 
SaveToCacheFile()269 bool InMemoryURLIndex::SaveToCacheFile() {
270   base::TimeTicks beginning_time = base::TimeTicks::Now();
271   InMemoryURLIndexCacheItem index_cache;
272   SavePrivateData(&index_cache);
273   std::string data;
274   if (!index_cache.SerializeToString(&data)) {
275     LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";
276     return false;
277   }
278 
279   // Write the cache to a file then swap it for the old cache, if any, and
280   // delete the old cache.
281   FilePath file_path;
282   if (!GetCacheFilePath(&file_path))
283     return false;
284   file_util::ScopedFILE file(file_util::OpenFile(file_path, "w"));
285   if (!file.get())
286     return false;
287 
288   int size = data.size();
289   if (file_util::WriteFile(file_path, data.c_str(), size) != size) {
290     LOG(WARNING) << "Failed to write " << file_path.value();
291     return false;
292   }
293   UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",
294                       base::TimeTicks::Now() - beginning_time);
295   return true;
296 }
297 
UpdateURL(URLID row_id,const URLRow & row)298 void InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) {
299   // The row may or may not already be in our index. If it is not already
300   // indexed and it qualifies then it gets indexed. If it is already
301   // indexed and still qualifies then it gets updated, otherwise it
302   // is deleted from the index.
303   HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
304   if (row_pos == history_info_map_.end()) {
305     // This new row should be indexed if it qualifies.
306     if (RowQualifiesAsSignificant(row, base::Time()))
307       IndexRow(row);
308   } else if (RowQualifiesAsSignificant(row, base::Time())) {
309     // This indexed row still qualifies and will be re-indexed.
310     // The url won't have changed but the title, visit count, etc.
311     // might have changed.
312     URLRow& old_row = row_pos->second;
313     old_row.set_visit_count(row.visit_count());
314     old_row.set_typed_count(row.typed_count());
315     old_row.set_last_visit(row.last_visit());
316     // TODO(mrossetti): When we start indexing the title the next line
317     // will need attention.
318     old_row.set_title(row.title());
319   } else {
320     // This indexed row no longer qualifies and will be de-indexed.
321     history_info_map_.erase(row_id);
322   }
323   // This invalidates the cache.
324   term_char_word_set_cache_.clear();
325   // TODO(mrossetti): Record this transaction in the cache.
326 }
327 
DeleteURL(URLID row_id)328 void InMemoryURLIndex::DeleteURL(URLID row_id) {
329   // Note that this does not remove any reference to this row from the
330   // word_id_history_map_. That map will continue to contain (and return)
331   // hits against this row until that map is rebuilt, but since the
332   // history_info_map_ no longer references the row no erroneous results
333   // will propagate to the user.
334   history_info_map_.erase(row_id);
335   // This invalidates the word cache.
336   term_char_word_set_cache_.clear();
337   // TODO(mrossetti): Record this transaction in the cache.
338 }
339 
340 // Searching
341 
HistoryItemsForTerms(const String16Vector & terms)342 ScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
343     const String16Vector& terms) {
344   ScoredHistoryMatches scored_items;
345   if (!terms.empty()) {
346     // Reset used_ flags for term_char_word_set_cache_. We use a basic mark-
347     // and-sweep approach.
348     ResetTermCharWordSetCache();
349 
350     // Lowercase the terms.
351     // TODO(mrossetti): Another opportunity for a transform algorithm.
352     String16Vector lower_terms;
353     for (String16Vector::const_iterator term_iter = terms.begin();
354          term_iter != terms.end(); ++term_iter)
355       lower_terms.push_back(l10n_util::ToLower(*term_iter));
356 
357     String16Vector::value_type all_terms(JoinString(lower_terms, ' '));
358     HistoryIDSet history_id_set = HistoryIDSetFromWords(all_terms);
359 
360     // Don't perform any scoring (and don't return any matches) if the
361     // candidate pool is large. (See comments in header.)
362     const size_t kItemsToScoreLimit = 500;
363     if (history_id_set.size() <= kItemsToScoreLimit) {
364       // Pass over all of the candidates filtering out any without a proper
365       // substring match, inserting those which pass in order by score.
366       scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),
367           AddHistoryMatch(*this, lower_terms)).ScoredMatches();
368 
369       // Select and sort only the top kMaxMatches results.
370       if (scored_items.size() > AutocompleteProvider::kMaxMatches) {
371         std::partial_sort(scored_items.begin(),
372                           scored_items.begin() +
373                               AutocompleteProvider::kMaxMatches,
374                           scored_items.end(),
375                           ScoredHistoryMatch::MatchScoreGreater);
376           scored_items.resize(AutocompleteProvider::kMaxMatches);
377       } else {
378         std::sort(scored_items.begin(), scored_items.end(),
379                   ScoredHistoryMatch::MatchScoreGreater);
380       }
381     }
382   }
383 
384   // Remove any stale TermCharWordSet's.
385   term_char_word_set_cache_.erase(
386       std::remove_if(term_char_word_set_cache_.begin(),
387                      term_char_word_set_cache_.end(),
388                      std::mem_fun_ref(&TermCharWordSet::is_not_used)),
389       term_char_word_set_cache_.end());
390   return scored_items;
391 }
392 
ResetTermCharWordSetCache()393 void InMemoryURLIndex::ResetTermCharWordSetCache() {
394   // TODO(mrossetti): Consider keeping more of the cache around for possible
395   // repeat searches, except a 'shortcuts' approach might be better for that.
396   // TODO(mrossetti): Change TermCharWordSet to a class and use for_each.
397   for (TermCharWordSetVector::iterator iter =
398        term_char_word_set_cache_.begin();
399        iter != term_char_word_set_cache_.end(); ++iter)
400     iter->used_ = false;
401 }
402 
HistoryIDSetFromWords(const string16 & uni_string)403 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
404     const string16& uni_string) {
405   // Break the terms down into individual terms (words), get the candidate
406   // set for each term, and intersect each to get a final candidate list.
407   // Note that a single 'term' from the user's perspective might be
408   // a string like "http://www.somewebsite.com" which, from our perspective,
409   // is four words: 'http', 'www', 'somewebsite', and 'com'.
410   HistoryIDSet history_id_set;
411   String16Set words = WordSetFromString16(uni_string);
412   bool first_word = true;
413   for (String16Set::iterator iter = words.begin();
414        iter != words.end(); ++iter) {
415     String16Set::value_type uni_word = *iter;
416     HistoryIDSet term_history_id_set =
417         InMemoryURLIndex::HistoryIDsForTerm(uni_word);
418     if (first_word) {
419       history_id_set.swap(term_history_id_set);
420       first_word = false;
421     } else {
422       HistoryIDSet old_history_id_set(history_id_set);
423       history_id_set.clear();
424       std::set_intersection(old_history_id_set.begin(),
425                             old_history_id_set.end(),
426                             term_history_id_set.begin(),
427                             term_history_id_set.end(),
428                             std::inserter(history_id_set,
429                                           history_id_set.begin()));
430     }
431   }
432   return history_id_set;
433 }
434 
HistoryIDsForTerm(const string16 & uni_word)435 InMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
436     const string16& uni_word) {
437   InMemoryURLIndex::HistoryIDSet history_id_set;
438 
439   // For each unique character in the word, in order of first appearance, get
440   // the char/word_id map entry and intersect with the set in an incremental
441   // manner.
442   Char16Vector uni_chars = Char16VectorFromString16(uni_word);
443   WordIDSet word_id_set(WordIDSetForTermChars(uni_chars));
444 
445   // TODO(mrossetti): At this point, as a possible optimization, we could
446   // scan through all candidate words and make sure the |uni_word| is a
447   // substring within the candidate words, eliminating those which aren't.
448   // I'm not sure it would be worth the effort. And remember, we've got to do
449   // a final substring match in order to get the highlighting ranges later
450   // in the process in any case.
451 
452   // If any words resulted then we can compose a set of history IDs by unioning
453   // the sets from each word.
454   if (!word_id_set.empty()) {
455     for (WordIDSet::iterator word_id_iter = word_id_set.begin();
456          word_id_iter != word_id_set.end(); ++word_id_iter) {
457       WordID word_id = *word_id_iter;
458       WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
459       if (word_iter != word_id_history_map_.end()) {
460         HistoryIDSet& word_history_id_set(word_iter->second);
461         history_id_set.insert(word_history_id_set.begin(),
462                               word_history_id_set.end());
463       }
464     }
465   }
466 
467   return history_id_set;
468 }
469 
470 // Utility Functions
471 
472 // static
WordSetFromString16(const string16 & uni_string)473 InMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16(
474     const string16& uni_string) {
475   String16Vector words = WordVectorFromString16(uni_string, false);
476   String16Set word_set;
477   for (String16Vector::const_iterator iter = words.begin(); iter != words.end();
478        ++iter)
479     word_set.insert(l10n_util::ToLower(*iter));
480   return word_set;
481 }
482 
483 // static
WordVectorFromString16(const string16 & uni_string,bool break_on_space)484 InMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16(
485     const string16& uni_string,
486     bool break_on_space) {
487   base::BreakIterator iter(&uni_string, break_on_space ?
488       base::BreakIterator::BREAK_SPACE : base::BreakIterator::BREAK_WORD);
489   String16Vector words;
490   if (!iter.Init())
491     return words;
492   while (iter.Advance()) {
493     if (break_on_space || iter.IsWord()) {
494       string16 word = iter.GetString();
495       if (break_on_space)
496         TrimWhitespace(word, TRIM_ALL, &word);
497       if (!word.empty())
498         words.push_back(word);
499     }
500   }
501   return words;
502 }
503 
504 // static
Char16VectorFromString16(const string16 & uni_word)505 InMemoryURLIndex::Char16Vector InMemoryURLIndex::Char16VectorFromString16(
506     const string16& uni_word) {
507   InMemoryURLIndex::Char16Vector characters;
508   InMemoryURLIndex::Char16Set unique_characters;
509   for (string16::const_iterator iter = uni_word.begin();
510        iter != uni_word.end(); ++iter) {
511     if (!unique_characters.count(*iter)) {
512       unique_characters.insert(*iter);
513       characters.push_back(*iter);
514     }
515   }
516   return characters;
517 }
518 
519 // static
Char16SetFromString16(const string16 & uni_word)520 InMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16(
521     const string16& uni_word) {
522   Char16Set characters;
523   for (string16::const_iterator iter = uni_word.begin();
524        iter != uni_word.end(); ++iter)
525     characters.insert(*iter);
526   return characters;
527 }
528 
AddWordToIndex(const string16 & uni_word,HistoryID history_id)529 void InMemoryURLIndex::AddWordToIndex(const string16& uni_word,
530                                       HistoryID history_id) {
531   WordMap::iterator word_pos = word_map_.find(uni_word);
532   if (word_pos != word_map_.end())
533     UpdateWordHistory(word_pos->second, history_id);
534   else
535     AddWordHistory(uni_word, history_id);
536 }
537 
UpdateWordHistory(WordID word_id,HistoryID history_id)538 void InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) {
539     WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
540     DCHECK(history_pos != word_id_history_map_.end());
541     HistoryIDSet& history_id_set(history_pos->second);
542     history_id_set.insert(history_id);
543 }
544 
545 // Add a new word to the word list and the word map, and then create a
546 // new entry in the word/history map.
AddWordHistory(const string16 & uni_word,HistoryID history_id)547 void InMemoryURLIndex::AddWordHistory(const string16& uni_word,
548                                       HistoryID history_id) {
549   word_list_.push_back(uni_word);
550   WordID word_id = word_list_.size() - 1;
551   word_map_[uni_word] = word_id;
552   HistoryIDSet history_id_set;
553   history_id_set.insert(history_id);
554   word_id_history_map_[word_id] = history_id_set;
555   // For each character in the newly added word (i.e. a word that is not
556   // already in the word index), add the word to the character index.
557   Char16Set characters = Char16SetFromString16(uni_word);
558   for (Char16Set::iterator uni_char_iter = characters.begin();
559        uni_char_iter != characters.end(); ++uni_char_iter) {
560     Char16Set::value_type uni_char = *uni_char_iter;
561     CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
562     if (char_iter != char_word_map_.end()) {
563       // Update existing entry in the char/word index.
564       WordIDSet& word_id_set(char_iter->second);
565       word_id_set.insert(word_id);
566     } else {
567       // Create a new entry in the char/word index.
568       WordIDSet word_id_set;
569       word_id_set.insert(word_id);
570       char_word_map_[uni_char] = word_id_set;
571     }
572   }
573 }
574 
WordIDSetForTermChars(const InMemoryURLIndex::Char16Vector & uni_chars)575 InMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars(
576     const InMemoryURLIndex::Char16Vector& uni_chars) {
577   size_t index = CachedResultsIndexForTerm(uni_chars);
578 
579   // If there were no unprocessed characters in the search term |uni_chars|
580   // then we can use the cached one as-is as the results with no further
581   // filtering.
582   if (index != kNoCachedResultForTerm && index == uni_chars.size() - 1)
583     return term_char_word_set_cache_[index].word_id_set_;
584 
585   // Some or all of the characters remain to be indexed so trim the cache.
586   if (index + 1 < term_char_word_set_cache_.size())
587     term_char_word_set_cache_.resize(index + 1);
588   WordIDSet word_id_set;
589   // Take advantage of our cached starting point, if any.
590   Char16Vector::const_iterator c_iter = uni_chars.begin();
591   if (index != kNoCachedResultForTerm) {
592     word_id_set = term_char_word_set_cache_[index].word_id_set_;
593     c_iter += index + 1;
594   }
595   // Now process the remaining characters in the search term.
596   for (; c_iter != uni_chars.end(); ++c_iter) {
597     Char16Vector::value_type uni_char = *c_iter;
598     CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
599     if (char_iter == char_word_map_.end()) {
600       // A character was not found so there are no matching results: bail.
601       word_id_set.clear();
602       break;
603     }
604     WordIDSet& char_word_id_set(char_iter->second);
605     // It is possible for there to no longer be any words associated with
606     // a particular character. Give up in that case.
607     if (char_word_id_set.empty()) {
608       word_id_set.clear();
609       break;
610     }
611 
612     if (word_id_set.empty()) {
613       word_id_set = char_word_id_set;
614     } else {
615       WordIDSet old_word_id_set(word_id_set);
616       word_id_set.clear();
617       std::set_intersection(old_word_id_set.begin(),
618                             old_word_id_set.end(),
619                             char_word_id_set.begin(),
620                             char_word_id_set.end(),
621                             std::inserter(word_id_set,
622                             word_id_set.begin()));
623     }
624     // Add this new char/set instance to the cache.
625     term_char_word_set_cache_.push_back(TermCharWordSet(
626         uni_char, word_id_set, true));
627   }
628   return word_id_set;
629 }
630 
CachedResultsIndexForTerm(const InMemoryURLIndex::Char16Vector & uni_chars)631 size_t InMemoryURLIndex::CachedResultsIndexForTerm(
632     const InMemoryURLIndex::Char16Vector& uni_chars) {
633   TermCharWordSetVector::iterator t_iter = term_char_word_set_cache_.begin();
634   for (Char16Vector::const_iterator c_iter = uni_chars.begin();
635        (c_iter != uni_chars.end()) &&
636        (t_iter != term_char_word_set_cache_.end()) &&
637        (*c_iter == t_iter->char_);
638        ++c_iter, ++t_iter)
639     t_iter->used_ = true;  // Update the cache.
640   return t_iter - term_char_word_set_cache_.begin() - 1;
641 }
642 
643 // static
MatchTermInString(const string16 & term,const string16 & string,int term_num)644 TermMatches InMemoryURLIndex::MatchTermInString(const string16& term,
645                                                 const string16& string,
646                                                 int term_num) {
647   TermMatches matches;
648   for (size_t location = string.find(term); location != string16::npos;
649        location = string.find(term, location + 1))
650     matches.push_back(TermMatch(term_num, location, term.size()));
651   return matches;
652 }
653 
654 // static
SortAndDeoverlap(const TermMatches & matches)655 TermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) {
656   if (matches.empty())
657     return matches;
658   TermMatches sorted_matches = matches;
659   std::sort(sorted_matches.begin(), sorted_matches.end(),
660             MatchOffsetLess);
661   TermMatches clean_matches;
662   TermMatch last_match = sorted_matches[0];
663   clean_matches.push_back(last_match);
664   for (TermMatches::const_iterator iter = sorted_matches.begin() + 1;
665        iter != sorted_matches.end(); ++iter) {
666     if (iter->offset >= last_match.offset + last_match.length) {
667       last_match = *iter;
668       clean_matches.push_back(last_match);
669     }
670   }
671   return clean_matches;
672 }
673 
674 // static
OffsetsFromTermMatches(const TermMatches & matches)675 std::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches(
676     const TermMatches& matches) {
677   std::vector<size_t> offsets;
678   for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i)
679     offsets.push_back(i->offset);
680   return offsets;
681 }
682 
683 // static
ReplaceOffsetsInTermMatches(const TermMatches & matches,const std::vector<size_t> & offsets)684 TermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches(
685     const TermMatches& matches,
686     const std::vector<size_t>& offsets) {
687   DCHECK_EQ(matches.size(), offsets.size());
688   TermMatches new_matches;
689   std::vector<size_t>::const_iterator offset_iter = offsets.begin();
690   for (TermMatches::const_iterator term_iter = matches.begin();
691        term_iter != matches.end(); ++term_iter, ++offset_iter) {
692     if (*offset_iter != string16::npos) {
693       TermMatch new_match(*term_iter);
694       new_match.offset = *offset_iter;
695       new_matches.push_back(new_match);
696     }
697   }
698   return new_matches;
699 }
700 
701 // static
ScoredMatchForURL(const URLRow & row,const String16Vector & terms)702 ScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
703     const URLRow& row,
704     const String16Vector& terms) {
705   ScoredHistoryMatch match(row);
706   GURL gurl = row.url();
707   if (!gurl.is_valid())
708     return match;
709 
710   // Figure out where each search term appears in the URL and/or page title
711   // so that we can score as well as provide autocomplete highlighting.
712   string16 url = l10n_util::ToLower(UTF8ToUTF16(gurl.spec()));
713   // Strip any 'http://' prefix before matching.
714   if (url_util::FindAndCompareScheme(url, chrome::kHttpScheme, NULL)) {
715     match.prefix_adjust = strlen(chrome::kHttpScheme) + 3;  // Allow for '://'.
716     url = url.substr(match.prefix_adjust);
717   }
718 
719   string16 title = l10n_util::ToLower(row.title());
720   int term_num = 0;
721   for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end();
722        ++iter, ++term_num) {
723     string16 term = *iter;
724     TermMatches url_term_matches = MatchTermInString(term, url, term_num);
725     TermMatches title_term_matches = MatchTermInString(term, title, term_num);
726     if (url_term_matches.empty() && title_term_matches.empty())
727       return match;  // A term was not found in either URL or title - reject.
728     match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(),
729                              url_term_matches.end());
730     match.title_matches.insert(match.title_matches.end(),
731                                title_term_matches.begin(),
732                                title_term_matches.end());
733   }
734 
735   // Sort matches by offset and eliminate any which overlap.
736   match.url_matches = SortAndDeoverlap(match.url_matches);
737   match.title_matches = SortAndDeoverlap(match.title_matches);
738 
739   // Get partial scores based on term matching. Note that the score for
740   // each of the URL and title are adjusted by the fraction of the
741   // terms appearing in each.
742   int url_score = ScoreComponentForMatches(match.url_matches, url.size()) *
743       match.url_matches.size() / terms.size();
744   int title_score =
745       ScoreComponentForMatches(match.title_matches, title.size()) *
746       static_cast<int>(match.title_matches.size()) /
747       static_cast<int>(terms.size());
748   // Arbitrarily pick the best.
749   // TODO(mrossetti): It might make sense that a term which appears in both the
750   // URL and the Title should boost the score a bit.
751   int term_score = std::max(url_score, title_score);
752   if (term_score == 0)
753     return match;
754 
755   // Factor in recency of visit, visit count and typed count attributes of the
756   // URLRow.
757   const int kDaysAgoLevel[] = { 0, 10, 20, 30 };
758   int score = ScoreForValue((base::Time::Now() -
759       row.last_visit()).InDays(), kDaysAgoLevel);
760   const int kVisitCountLevel[] = { 30, 10, 5, 3 };
761   int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel);
762   const int kTypedCountLevel[] = { 10, 5, 3, 1 };
763   int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel);
764 
765   // Determine how many of the factors comprising the final score are
766   // significant by summing the relative factors for each and subtracting how
767   // many will be 'discarded' even if they are low.
768   const int kVisitCountMultiplier = 2;
769   const int kTypedCountMultiplier = 3;
770   const int kSignificantFactors =
771       kVisitCountMultiplier +  // Visit count factor plus
772       kTypedCountMultiplier +  // typed count factor plus
773       2 -                      // one each for string match and last visit
774       2;                       // minus 2 insignificant factors.
775   // The following, in effect, discards up to |kSignificantFactors| low scoring
776   // elements which contribute little to the score but which can inordinately
777   // drag down an otherwise good score.
778   match.raw_score = std::min(kScoreRank[0], (term_score + score +
779       (visit_count_value * kVisitCountMultiplier) + (typed_count_value *
780       kTypedCountMultiplier)) / kSignificantFactors);
781 
782   return match;
783 }
784 
ScoreComponentForMatches(const TermMatches & matches,size_t max_length)785 int InMemoryURLIndex::ScoreComponentForMatches(const TermMatches& matches,
786                                                size_t max_length) {
787   // TODO(mrossetti): This is good enough for now but must be fine-tuned.
788   if (matches.empty())
789     return 0;
790 
791   // Score component for whether the input terms (if more than one) were found
792   // in the same order in the match.  Start with kOrderMaxValue points divided
793   // equally among (number of terms - 1); then discount each of those terms that
794   // is out-of-order in the match.
795   const int kOrderMaxValue = 250;
796   int order_value = kOrderMaxValue;
797   if (matches.size() > 1) {
798     int max_possible_out_of_order = matches.size() - 1;
799     int out_of_order = 0;
800     for (size_t i = 1; i < matches.size(); ++i) {
801       if (matches[i - 1].term_num > matches[i].term_num)
802         ++out_of_order;
803     }
804     order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue /
805         max_possible_out_of_order;
806   }
807 
808   // Score component for how early in the match string the first search term
809   // appears.  Start with kStartMaxValue points and discount by
810   // 1/kMaxSignificantStart points for each character later than the first at
811   // which the term begins. No points are earned if the start of the match
812   // occurs at or after kMaxSignificantStart.
813   const size_t kMaxSignificantStart = 20;
814   const int kStartMaxValue = 250;
815   int start_value = (kMaxSignificantStart -
816       std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue /
817       kMaxSignificantStart;
818 
819   // Score component for how much of the matched string the input terms cover.
820   // kCompleteMaxValue points times the fraction of the URL/page title string
821   // that was matched.
822   size_t term_length_total = std::accumulate(matches.begin(), matches.end(),
823                                              0, AccumulateMatchLength);
824   const int kCompleteMaxValue = 500;
825   int complete_value = term_length_total * kCompleteMaxValue / max_length;
826 
827   int raw_score = order_value + start_value + complete_value;
828   const int kTermScoreLevel[] = { 1000, 650, 500, 200 };
829 
830   // Scale the sum of the three components above into a single score component
831   // on the same scale as that used in ScoredMatchForURL().
832   return ScoreForValue(raw_score, kTermScoreLevel);
833 }
834 
AddHistoryMatch(const InMemoryURLIndex & index,const String16Vector & lower_terms)835 InMemoryURLIndex::AddHistoryMatch::AddHistoryMatch(
836     const InMemoryURLIndex& index,
837     const String16Vector& lower_terms)
838   : index_(index),
839     lower_terms_(lower_terms) {
840 }
841 
~AddHistoryMatch()842 InMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {}
843 
operator ()(const InMemoryURLIndex::HistoryID history_id)844 void InMemoryURLIndex::AddHistoryMatch::operator()(
845     const InMemoryURLIndex::HistoryID history_id) {
846   HistoryInfoMap::const_iterator hist_pos =
847       index_.history_info_map_.find(history_id);
848   // Note that a history_id may be present in the word_id_history_map_ yet not
849   // be found in the history_info_map_. This occurs when an item has been
850   // deleted by the user or the item no longer qualifies as a quick result.
851   if (hist_pos != index_.history_info_map_.end()) {
852     const URLRow& hist_item = hist_pos->second;
853     ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_));
854     if (match.raw_score > 0)
855       scored_matches_.push_back(match);
856   }
857 }
858 
GetCacheFilePath(FilePath * file_path)859 bool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) {
860   if (history_dir_.empty())
861     return false;
862   *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache"));
863   return true;
864 }
865 
SavePrivateData(InMemoryURLIndexCacheItem * cache) const866 void InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const {
867   DCHECK(cache);
868   cache->set_timestamp(base::Time::Now().ToInternalValue());
869   cache->set_history_item_count(history_item_count_);
870   SaveWordList(cache);
871   SaveWordMap(cache);
872   SaveCharWordMap(cache);
873   SaveWordIDHistoryMap(cache);
874   SaveHistoryInfoMap(cache);
875 }
876 
RestorePrivateData(const InMemoryURLIndexCacheItem & cache)877 bool InMemoryURLIndex::RestorePrivateData(
878     const InMemoryURLIndexCacheItem& cache) {
879   last_saved_ = base::Time::FromInternalValue(cache.timestamp());
880   history_item_count_ = cache.history_item_count();
881   return (history_item_count_ == 0) || (RestoreWordList(cache) &&
882       RestoreWordMap(cache) && RestoreCharWordMap(cache) &&
883       RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache));
884 }
885 
886 
SaveWordList(InMemoryURLIndexCacheItem * cache) const887 void InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
888   if (word_list_.empty())
889     return;
890   WordListItem* list_item = cache->mutable_word_list();
891   list_item->set_word_count(word_list_.size());
892   for (String16Vector::const_iterator iter = word_list_.begin();
893        iter != word_list_.end(); ++iter)
894     list_item->add_word(UTF16ToUTF8(*iter));
895 }
896 
RestoreWordList(const InMemoryURLIndexCacheItem & cache)897 bool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) {
898   if (!cache.has_word_list())
899     return false;
900   const WordListItem& list_item(cache.word_list());
901   uint32 expected_item_count = list_item.word_count();
902   uint32 actual_item_count = list_item.word_size();
903   if (actual_item_count == 0 || actual_item_count != expected_item_count)
904     return false;
905   const RepeatedPtrField<std::string>& words(list_item.word());
906   for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();
907        iter != words.end(); ++iter)
908     word_list_.push_back(UTF8ToUTF16(*iter));
909   return true;
910 }
911 
SaveWordMap(InMemoryURLIndexCacheItem * cache) const912 void InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
913   if (word_map_.empty())
914     return;
915   WordMapItem* map_item = cache->mutable_word_map();
916   map_item->set_item_count(word_map_.size());
917   for (WordMap::const_iterator iter = word_map_.begin();
918        iter != word_map_.end(); ++iter) {
919     WordMapEntry* map_entry = map_item->add_word_map_entry();
920     map_entry->set_word(UTF16ToUTF8(iter->first));
921     map_entry->set_word_id(iter->second);
922   }
923 }
924 
RestoreWordMap(const InMemoryURLIndexCacheItem & cache)925 bool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) {
926   if (!cache.has_word_map())
927     return false;
928   const WordMapItem& list_item(cache.word_map());
929   uint32 expected_item_count = list_item.item_count();
930   uint32 actual_item_count = list_item.word_map_entry_size();
931   if (actual_item_count == 0 || actual_item_count != expected_item_count)
932     return false;
933   const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());
934   for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();
935        iter != entries.end(); ++iter)
936     word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();
937   return true;
938 }
939 
SaveCharWordMap(InMemoryURLIndexCacheItem * cache) const940 void InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const {
941   if (char_word_map_.empty())
942     return;
943   CharWordMapItem* map_item = cache->mutable_char_word_map();
944   map_item->set_item_count(char_word_map_.size());
945   for (CharWordIDMap::const_iterator iter = char_word_map_.begin();
946        iter != char_word_map_.end(); ++iter) {
947     CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
948     map_entry->set_char_16(iter->first);
949     const WordIDSet& word_id_set(iter->second);
950     map_entry->set_item_count(word_id_set.size());
951     for (WordIDSet::const_iterator set_iter = word_id_set.begin();
952          set_iter != word_id_set.end(); ++set_iter)
953       map_entry->add_word_id(*set_iter);
954   }
955 }
956 
RestoreCharWordMap(const InMemoryURLIndexCacheItem & cache)957 bool InMemoryURLIndex::RestoreCharWordMap(
958     const InMemoryURLIndexCacheItem& cache) {
959   if (!cache.has_char_word_map())
960     return false;
961   const CharWordMapItem& list_item(cache.char_word_map());
962   uint32 expected_item_count = list_item.item_count();
963   uint32 actual_item_count = list_item.char_word_map_entry_size();
964   if (actual_item_count == 0 || actual_item_count != expected_item_count)
965     return false;
966   const RepeatedPtrField<CharWordMapEntry>&
967       entries(list_item.char_word_map_entry());
968   for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =
969        entries.begin(); iter != entries.end(); ++iter) {
970     expected_item_count = iter->item_count();
971     actual_item_count = iter->word_id_size();
972     if (actual_item_count == 0 || actual_item_count != expected_item_count)
973       return false;
974     char16 uni_char = static_cast<char16>(iter->char_16());
975     WordIDSet word_id_set;
976     const RepeatedField<int32>& word_ids(iter->word_id());
977     for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();
978          jiter != word_ids.end(); ++jiter)
979       word_id_set.insert(*jiter);
980     char_word_map_[uni_char] = word_id_set;
981   }
982   return true;
983 }
984 
SaveWordIDHistoryMap(InMemoryURLIndexCacheItem * cache) const985 void InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache)
986     const {
987   if (word_id_history_map_.empty())
988     return;
989   WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
990   map_item->set_item_count(word_id_history_map_.size());
991   for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();
992        iter != word_id_history_map_.end(); ++iter) {
993     WordIDHistoryMapEntry* map_entry =
994         map_item->add_word_id_history_map_entry();
995     map_entry->set_word_id(iter->first);
996     const HistoryIDSet& history_id_set(iter->second);
997     map_entry->set_item_count(history_id_set.size());
998     for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();
999          set_iter != history_id_set.end(); ++set_iter)
1000       map_entry->add_history_id(*set_iter);
1001   }
1002 }
1003 
RestoreWordIDHistoryMap(const InMemoryURLIndexCacheItem & cache)1004 bool InMemoryURLIndex::RestoreWordIDHistoryMap(
1005     const InMemoryURLIndexCacheItem& cache) {
1006   if (!cache.has_word_id_history_map())
1007     return false;
1008   const WordIDHistoryMapItem& list_item(cache.word_id_history_map());
1009   uint32 expected_item_count = list_item.item_count();
1010   uint32 actual_item_count = list_item.word_id_history_map_entry_size();
1011   if (actual_item_count == 0 || actual_item_count != expected_item_count)
1012     return false;
1013   const RepeatedPtrField<WordIDHistoryMapEntry>&
1014       entries(list_item.word_id_history_map_entry());
1015   for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =
1016        entries.begin(); iter != entries.end(); ++iter) {
1017     expected_item_count = iter->item_count();
1018     actual_item_count = iter->history_id_size();
1019     if (actual_item_count == 0 || actual_item_count != expected_item_count)
1020       return false;
1021     WordID word_id = iter->word_id();
1022     HistoryIDSet history_id_set;
1023     const RepeatedField<int64>& history_ids(iter->history_id());
1024     for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();
1025          jiter != history_ids.end(); ++jiter)
1026       history_id_set.insert(*jiter);
1027     word_id_history_map_[word_id] = history_id_set;
1028   }
1029   return true;
1030 }
1031 
SaveHistoryInfoMap(InMemoryURLIndexCacheItem * cache) const1032 void InMemoryURLIndex::SaveHistoryInfoMap(
1033     InMemoryURLIndexCacheItem* cache) const {
1034   if (history_info_map_.empty())
1035     return;
1036   HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
1037   map_item->set_item_count(history_info_map_.size());
1038   for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
1039        iter != history_info_map_.end(); ++iter) {
1040     HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
1041     map_entry->set_history_id(iter->first);
1042     const URLRow& url_row(iter->second);
1043     // Note: We only save information that contributes to the index so there
1044     // is no need to save term_char_word_set_cache_ (not persistent),
1045     // languages_, etc.
1046     map_entry->set_visit_count(url_row.visit_count());
1047     map_entry->set_typed_count(url_row.typed_count());
1048     map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
1049     map_entry->set_url(url_row.url().spec());
1050     map_entry->set_title(UTF16ToUTF8(url_row.title()));
1051   }
1052 }
1053 
RestoreHistoryInfoMap(const InMemoryURLIndexCacheItem & cache)1054 bool InMemoryURLIndex::RestoreHistoryInfoMap(
1055     const InMemoryURLIndexCacheItem& cache) {
1056   if (!cache.has_history_info_map())
1057     return false;
1058   const HistoryInfoMapItem& list_item(cache.history_info_map());
1059   uint32 expected_item_count = list_item.item_count();
1060   uint32 actual_item_count = list_item.history_info_map_entry_size();
1061   if (actual_item_count == 0 || actual_item_count != expected_item_count)
1062     return false;
1063   const RepeatedPtrField<HistoryInfoMapEntry>&
1064       entries(list_item.history_info_map_entry());
1065   for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =
1066        entries.begin(); iter != entries.end(); ++iter) {
1067     HistoryID history_id = iter->history_id();
1068     GURL url(iter->url());
1069     URLRow url_row(url, history_id);
1070     url_row.set_visit_count(iter->visit_count());
1071     url_row.set_typed_count(iter->typed_count());
1072     url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));
1073     if (iter->has_title()) {
1074       string16 title(UTF8ToUTF16(iter->title()));
1075       url_row.set_title(title);
1076     }
1077     history_info_map_[history_id] = url_row;
1078   }
1079   return true;
1080 }
1081 
1082 }  // namespace history
1083