• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/sync/glue/typed_url_model_associator.h"
6 
7 #include <algorithm>
8 #include <set>
9 
10 #include "base/location.h"
11 #include "base/logging.h"
12 #include "base/metrics/histogram.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "chrome/browser/history/history_backend.h"
15 #include "chrome/browser/sync/profile_sync_service.h"
16 #include "content/public/browser/browser_thread.h"
17 #include "net/base/net_util.h"
18 #include "sync/api/sync_error.h"
19 #include "sync/internal_api/public/read_node.h"
20 #include "sync/internal_api/public/read_transaction.h"
21 #include "sync/internal_api/public/write_node.h"
22 #include "sync/internal_api/public/write_transaction.h"
23 #include "sync/protocol/typed_url_specifics.pb.h"
24 
25 using content::BrowserThread;
26 
27 namespace browser_sync {
28 
29 // The server backend can't handle arbitrarily large node sizes, so to keep
30 // the size under control we limit the visit array.
31 static const int kMaxTypedUrlVisits = 100;
32 
33 // There's no limit on how many visits the history DB could have for a given
34 // typed URL, so we limit how many we fetch from the DB to avoid crashes due to
35 // running out of memory (http://crbug.com/89793). This value is different
36 // from kMaxTypedUrlVisits, as some of the visits fetched from the DB may be
37 // RELOAD visits, which will be stripped.
38 static const int kMaxVisitsToFetch = 1000;
39 
CheckVisitOrdering(const history::VisitVector & visits)40 static bool CheckVisitOrdering(const history::VisitVector& visits) {
41   int64 previous_visit_time = 0;
42   for (history::VisitVector::const_iterator visit = visits.begin();
43        visit != visits.end(); ++visit) {
44     if (visit != visits.begin()) {
45       // We allow duplicate visits here - they shouldn't really be allowed, but
46       // they still seem to show up sometimes and we haven't figured out the
47       // source, so we just log an error instead of failing an assertion.
48       // (http://crbug.com/91473).
49       if (previous_visit_time == visit->visit_time.ToInternalValue())
50         DVLOG(1) << "Duplicate visit time encountered";
51       else if (previous_visit_time > visit->visit_time.ToInternalValue())
52         return false;
53     }
54 
55     previous_visit_time = visit->visit_time.ToInternalValue();
56   }
57   return true;
58 }
59 
TypedUrlModelAssociator(ProfileSyncService * sync_service,history::HistoryBackend * history_backend,DataTypeErrorHandler * error_handler)60 TypedUrlModelAssociator::TypedUrlModelAssociator(
61     ProfileSyncService* sync_service,
62     history::HistoryBackend* history_backend,
63     DataTypeErrorHandler* error_handler)
64     : sync_service_(sync_service),
65       history_backend_(history_backend),
66       expected_loop_(base::MessageLoop::current()),
67       abort_requested_(false),
68       error_handler_(error_handler),
69       num_db_accesses_(0),
70       num_db_errors_(0) {
71   DCHECK(sync_service_);
72   // history_backend_ may be null for unit tests (since it's not mockable).
73   DCHECK(!BrowserThread::CurrentlyOn(BrowserThread::UI));
74 }
75 
~TypedUrlModelAssociator()76 TypedUrlModelAssociator::~TypedUrlModelAssociator() {}
77 
78 
FixupURLAndGetVisits(history::URLRow * url,history::VisitVector * visits)79 bool TypedUrlModelAssociator::FixupURLAndGetVisits(
80     history::URLRow* url,
81     history::VisitVector* visits) {
82   ++num_db_accesses_;
83   CHECK(history_backend_);
84   if (!history_backend_->GetMostRecentVisitsForURL(
85           url->id(), kMaxVisitsToFetch, visits)) {
86     ++num_db_errors_;
87     return false;
88   }
89 
90   // Sometimes (due to a bug elsewhere in the history or sync code, or due to
91   // a crash between adding a URL to the history database and updating the
92   // visit DB) the visit vector for a URL can be empty. If this happens, just
93   // create a new visit whose timestamp is the same as the last_visit time.
94   // This is a workaround for http://crbug.com/84258.
95   if (visits->empty()) {
96     DVLOG(1) << "Found empty visits for URL: " << url->url();
97 
98     if (url->last_visit().is_null()) {
99       // If modified URL is bookmarked, history backend treats it as modified
100       // even if all its visits are deleted. Return false to stop further
101       // processing because sync expects valid visit time for modified entry.
102       return false;
103     }
104 
105     history::VisitRow visit(
106         url->id(), url->last_visit(), 0, content::PAGE_TRANSITION_TYPED, 0);
107     visits->push_back(visit);
108   }
109 
110   // GetMostRecentVisitsForURL() returns the data in the opposite order that
111   // we need it, so reverse it.
112   std::reverse(visits->begin(), visits->end());
113 
114   // Sometimes, the last_visit field in the URL doesn't match the timestamp of
115   // the last visit in our visit array (they come from different tables, so
116   // crashes/bugs can cause them to mismatch), so just set it here.
117   url->set_last_visit(visits->back().visit_time);
118   DCHECK(CheckVisitOrdering(*visits));
119   return true;
120 }
121 
ShouldIgnoreUrl(const GURL & url)122 bool TypedUrlModelAssociator::ShouldIgnoreUrl(const GURL& url) {
123   // Ignore empty URLs. Not sure how this can happen (maybe import from other
124   // busted browsers, or misuse of the history API, or just plain bugs) but we
125   // can't deal with them.
126   if (url.spec().empty())
127     return true;
128 
129   // Ignore local file URLs.
130   if (url.SchemeIsFile())
131     return true;
132 
133   // Ignore localhost URLs.
134   if (net::IsLocalhost(url.host()))
135     return true;
136 
137   return false;
138 }
139 
ShouldIgnoreVisits(const history::VisitVector & visits)140 bool TypedUrlModelAssociator::ShouldIgnoreVisits(
141     const history::VisitVector& visits) {
142   // We ignore URLs that were imported, but have never been visited by
143   // chromium.
144   static const int kLastImportedSource = history::SOURCE_EXTENSION;
145   history::VisitSourceMap map;
146   if (!history_backend_->GetVisitsSource(visits, &map))
147     return false;  // If we can't read the visit, assume it's not imported.
148 
149   // Walk the list of visits and look for a non-imported item.
150   for (history::VisitVector::const_iterator it = visits.begin();
151        it != visits.end(); ++it) {
152     if (map.count(it->visit_id) == 0 ||
153         map[it->visit_id] <= kLastImportedSource) {
154       return false;
155     }
156   }
157   // We only saw imported visits, so tell the caller to ignore them.
158   return true;
159 }
160 
AssociateModels(syncer::SyncMergeResult * local_merge_result,syncer::SyncMergeResult * syncer_merge_result)161 syncer::SyncError TypedUrlModelAssociator::AssociateModels(
162     syncer::SyncMergeResult* local_merge_result,
163     syncer::SyncMergeResult* syncer_merge_result) {
164   ClearErrorStats();
165   syncer::SyncError error = DoAssociateModels();
166   UMA_HISTOGRAM_PERCENTAGE("Sync.TypedUrlModelAssociationErrors",
167                            GetErrorPercentage());
168   ClearErrorStats();
169   return error;
170 }
171 
ClearErrorStats()172 void TypedUrlModelAssociator::ClearErrorStats() {
173   num_db_accesses_ = 0;
174   num_db_errors_ = 0;
175 }
176 
GetErrorPercentage() const177 int TypedUrlModelAssociator::GetErrorPercentage() const {
178   return num_db_accesses_ ? (100 * num_db_errors_ / num_db_accesses_) : 0;
179 }
180 
DoAssociateModels()181 syncer::SyncError TypedUrlModelAssociator::DoAssociateModels() {
182   DVLOG(1) << "Associating TypedUrl Models";
183   DCHECK(expected_loop_ == base::MessageLoop::current());
184 
185   history::URLRows typed_urls;
186   ++num_db_accesses_;
187   bool query_succeeded =
188       history_backend_ && history_backend_->GetAllTypedURLs(&typed_urls);
189 
190   history::URLRows new_urls;
191   TypedUrlVisitVector new_visits;
192   TypedUrlUpdateVector updated_urls;
193   {
194     base::AutoLock au(abort_lock_);
195     if (abort_requested_) {
196       return syncer::SyncError(FROM_HERE,
197                                syncer::SyncError::DATATYPE_ERROR,
198                                "Association was aborted.",
199                                model_type());
200     }
201 
202     // Must lock and check first to make sure |error_handler_| is valid.
203     if (!query_succeeded) {
204       ++num_db_errors_;
205       return error_handler_->CreateAndUploadError(
206           FROM_HERE,
207           "Could not get the typed_url entries.",
208           model_type());
209     }
210 
211     // Get all the visits.
212     std::map<history::URLID, history::VisitVector> visit_vectors;
213     for (history::URLRows::iterator ix = typed_urls.begin();
214          ix != typed_urls.end();) {
215       DCHECK_EQ(0U, visit_vectors.count(ix->id()));
216       if (!FixupURLAndGetVisits(&(*ix), &(visit_vectors[ix->id()])) ||
217           ShouldIgnoreUrl(ix->url()) ||
218           ShouldIgnoreVisits(visit_vectors[ix->id()])) {
219         // Ignore this URL if we couldn't load the visits or if there's some
220         // other problem with it (it was empty, or imported and never visited).
221         ix = typed_urls.erase(ix);
222       } else {
223         ++ix;
224       }
225     }
226 
227     syncer::WriteTransaction trans(FROM_HERE, sync_service_->GetUserShare());
228     syncer::ReadNode typed_url_root(&trans);
229     if (typed_url_root.InitTypeRoot(syncer::TYPED_URLS) !=
230         syncer::BaseNode::INIT_OK) {
231       return error_handler_->CreateAndUploadError(
232           FROM_HERE,
233           "Server did not create the top-level typed_url node. We "
234           "might be running against an out-of-date server.",
235           model_type());
236     }
237 
238     std::set<std::string> current_urls;
239     for (history::URLRows::iterator ix = typed_urls.begin();
240          ix != typed_urls.end(); ++ix) {
241       std::string tag = ix->url().spec();
242       // Empty URLs should be filtered out by ShouldIgnoreUrl() previously.
243       DCHECK(!tag.empty());
244       history::VisitVector& visits = visit_vectors[ix->id()];
245 
246       syncer::ReadNode node(&trans);
247       if (node.InitByClientTagLookup(syncer::TYPED_URLS, tag) ==
248               syncer::BaseNode::INIT_OK) {
249         // Same URL exists in sync data and in history data - compare the
250         // entries to see if there's any difference.
251         sync_pb::TypedUrlSpecifics typed_url(
252             FilterExpiredVisits(node.GetTypedUrlSpecifics()));
253         DCHECK_EQ(tag, typed_url.url());
254 
255         // Initialize fields in |new_url| to the same values as the fields in
256         // the existing URLRow in the history DB. This is needed because we
257         // overwrite the existing value below in WriteToHistoryBackend(), but
258         // some of the values in that structure are not synced (like
259         // typed_count).
260         history::URLRow new_url(*ix);
261 
262         std::vector<history::VisitInfo> added_visits;
263         MergeResult difference =
264             MergeUrls(typed_url, *ix, &visits, &new_url, &added_visits);
265         if (difference & DIFF_UPDATE_NODE) {
266           syncer::WriteNode write_node(&trans);
267           if (write_node.InitByClientTagLookup(syncer::TYPED_URLS, tag) !=
268                   syncer::BaseNode::INIT_OK) {
269             return error_handler_->CreateAndUploadError(
270                 FROM_HERE,
271                 "Failed to edit typed_url sync node.",
272                 model_type());
273           }
274           // We don't want to resurrect old visits that have been aged out by
275           // other clients, so remove all visits that are older than the
276           // earliest existing visit in the sync node.
277           if (typed_url.visits_size() > 0) {
278             base::Time earliest_visit =
279                 base::Time::FromInternalValue(typed_url.visits(0));
280             for (history::VisitVector::iterator it = visits.begin();
281                  it != visits.end() && it->visit_time < earliest_visit; ) {
282               it = visits.erase(it);
283             }
284             // Should never be possible to delete all the items, since the
285             // visit vector contains all the items in typed_url.visits.
286             DCHECK(visits.size() > 0);
287           }
288           DCHECK_EQ(new_url.last_visit().ToInternalValue(),
289                     visits.back().visit_time.ToInternalValue());
290           WriteToSyncNode(new_url, visits, &write_node);
291         }
292         if (difference & DIFF_LOCAL_ROW_CHANGED) {
293           updated_urls.push_back(
294               std::pair<history::URLID, history::URLRow>(ix->id(), new_url));
295         }
296         if (difference & DIFF_LOCAL_VISITS_ADDED) {
297           new_visits.push_back(
298               std::pair<GURL, std::vector<history::VisitInfo> >(ix->url(),
299                                                                 added_visits));
300         }
301       } else {
302         // Sync has never seen this URL before.
303         syncer::WriteNode node(&trans);
304         syncer::WriteNode::InitUniqueByCreationResult result =
305             node.InitUniqueByCreation(syncer::TYPED_URLS,
306                                       typed_url_root, tag);
307         if (result != syncer::WriteNode::INIT_SUCCESS) {
308           return error_handler_->CreateAndUploadError(
309               FROM_HERE,
310               "Failed to create typed_url sync node: " + tag,
311               model_type());
312         }
313 
314         node.SetTitle(tag);
315         WriteToSyncNode(*ix, visits, &node);
316       }
317 
318       current_urls.insert(tag);
319     }
320 
321     // Now walk the sync nodes and detect any URLs that exist there, but not in
322     // the history DB, so we can add them to our local history DB.
323     std::vector<int64> obsolete_nodes;
324     int64 sync_child_id = typed_url_root.GetFirstChildId();
325     while (sync_child_id != syncer::kInvalidId) {
326       syncer::ReadNode sync_child_node(&trans);
327       if (sync_child_node.InitByIdLookup(sync_child_id) !=
328               syncer::BaseNode::INIT_OK) {
329         return error_handler_->CreateAndUploadError(
330             FROM_HERE,
331             "Failed to fetch child node.",
332             model_type());
333       }
334       const sync_pb::TypedUrlSpecifics& typed_url(
335           sync_child_node.GetTypedUrlSpecifics());
336 
337       sync_child_id = sync_child_node.GetSuccessorId();
338 
339       // Ignore old sync nodes that don't have any transition data stored with
340       // them, or transition data that does not match the visit data (will be
341       // deleted below).
342       if (typed_url.visit_transitions_size() == 0 ||
343           typed_url.visit_transitions_size() != typed_url.visits_size()) {
344         // Generate a debug assertion to help track down http://crbug.com/91473,
345         // even though we gracefully handle this case by throwing away this
346         // node.
347         DCHECK_EQ(typed_url.visits_size(), typed_url.visit_transitions_size());
348         DVLOG(1) << "Deleting obsolete sync node with no visit "
349                  << "transition info.";
350         obsolete_nodes.push_back(sync_child_node.GetId());
351         continue;
352       }
353 
354       if (typed_url.url().empty()) {
355         DVLOG(1) << "Ignoring empty URL in sync DB";
356         continue;
357       }
358 
359       // Now, get rid of the expired visits, and if there are no un-expired
360       // visits left, just ignore this node.
361       sync_pb::TypedUrlSpecifics filtered_url = FilterExpiredVisits(typed_url);
362       if (filtered_url.visits_size() == 0) {
363         DVLOG(1) << "Ignoring expired URL in sync DB: " << filtered_url.url();
364         continue;
365       }
366 
367       if (current_urls.find(filtered_url.url()) == current_urls.end()) {
368         // Update the local DB from the sync DB. Since we are doing our
369         // initial model association, we don't want to remove any of the
370         // existing visits (pass NULL as |visits_to_remove|).
371         UpdateFromSyncDB(filtered_url,
372                          &new_visits,
373                          NULL,
374                          &updated_urls,
375                          &new_urls);
376       }
377     }
378 
379     // If we encountered any obsolete nodes, remove them so they don't hang
380     // around and confuse people looking at the sync node browser.
381     if (!obsolete_nodes.empty()) {
382       for (std::vector<int64>::const_iterator it = obsolete_nodes.begin();
383            it != obsolete_nodes.end();
384            ++it) {
385         syncer::WriteNode sync_node(&trans);
386         if (sync_node.InitByIdLookup(*it) != syncer::BaseNode::INIT_OK) {
387           return error_handler_->CreateAndUploadError(
388               FROM_HERE,
389               "Failed to fetch obsolete node.",
390               model_type());
391         }
392         sync_node.Tombstone();
393       }
394     }
395   }
396 
397   // Since we're on the history thread, we don't have to worry about updating
398   // the history database after closing the write transaction, since
399   // this is the only thread that writes to the database.  We also don't have
400   // to worry about the sync model getting out of sync, because changes are
401   // propagated to the ChangeProcessor on this thread.
402   WriteToHistoryBackend(&new_urls, &updated_urls, &new_visits, NULL);
403   return syncer::SyncError();
404 }
405 
UpdateFromSyncDB(const sync_pb::TypedUrlSpecifics & typed_url,TypedUrlVisitVector * visits_to_add,history::VisitVector * visits_to_remove,TypedUrlUpdateVector * updated_urls,history::URLRows * new_urls)406 void TypedUrlModelAssociator::UpdateFromSyncDB(
407     const sync_pb::TypedUrlSpecifics& typed_url,
408     TypedUrlVisitVector* visits_to_add,
409     history::VisitVector* visits_to_remove,
410     TypedUrlUpdateVector* updated_urls,
411     history::URLRows* new_urls) {
412   history::URLRow new_url(GURL(typed_url.url()));
413   history::VisitVector existing_visits;
414   bool existing_url = history_backend_->GetURL(new_url.url(), &new_url);
415   if (existing_url) {
416     // This URL already exists locally - fetch the visits so we can
417     // merge them below.
418     if (!FixupURLAndGetVisits(&new_url, &existing_visits)) {
419       // Couldn't load the visits for this URL due to some kind of DB error.
420       // Don't bother writing this URL to the history DB (if we ignore the
421       // error and continue, we might end up duplicating existing visits).
422       DLOG(ERROR) << "Could not load visits for url: " << new_url.url();
423       return;
424     }
425   }
426   visits_to_add->push_back(std::pair<GURL, std::vector<history::VisitInfo> >(
427       new_url.url(), std::vector<history::VisitInfo>()));
428 
429   // Update the URL with information from the typed URL.
430   UpdateURLRowFromTypedUrlSpecifics(typed_url, &new_url);
431 
432   // Figure out which visits we need to add.
433   DiffVisits(existing_visits, typed_url, &visits_to_add->back().second,
434              visits_to_remove);
435 
436   if (existing_url) {
437     updated_urls->push_back(
438         std::pair<history::URLID, history::URLRow>(new_url.id(), new_url));
439   } else {
440     new_urls->push_back(new_url);
441   }
442 }
443 
FilterExpiredVisits(const sync_pb::TypedUrlSpecifics & source)444 sync_pb::TypedUrlSpecifics TypedUrlModelAssociator::FilterExpiredVisits(
445     const sync_pb::TypedUrlSpecifics& source) {
446   // Make a copy of the source, then regenerate the visits.
447   sync_pb::TypedUrlSpecifics specifics(source);
448   specifics.clear_visits();
449   specifics.clear_visit_transitions();
450   for (int i = 0; i < source.visits_size(); ++i) {
451     base::Time time = base::Time::FromInternalValue(source.visits(i));
452     if (!history_backend_->IsExpiredVisitTime(time)) {
453       specifics.add_visits(source.visits(i));
454       specifics.add_visit_transitions(source.visit_transitions(i));
455     }
456   }
457   DCHECK(specifics.visits_size() == specifics.visit_transitions_size());
458   return specifics;
459 }
460 
DeleteAllNodes(syncer::WriteTransaction * trans)461 bool TypedUrlModelAssociator::DeleteAllNodes(
462     syncer::WriteTransaction* trans) {
463   DCHECK(expected_loop_ == base::MessageLoop::current());
464 
465   // Just walk through all our child nodes and delete them.
466   syncer::ReadNode typed_url_root(trans);
467   if (typed_url_root.InitTypeRoot(syncer::TYPED_URLS) !=
468           syncer::BaseNode::INIT_OK) {
469     LOG(ERROR) << "Could not lookup root node";
470     return false;
471   }
472   int64 sync_child_id = typed_url_root.GetFirstChildId();
473   while (sync_child_id != syncer::kInvalidId) {
474     syncer::WriteNode sync_child_node(trans);
475     if (sync_child_node.InitByIdLookup(sync_child_id) !=
476             syncer::BaseNode::INIT_OK) {
477       LOG(ERROR) << "Typed url node lookup failed.";
478       return false;
479     }
480     sync_child_id = sync_child_node.GetSuccessorId();
481     sync_child_node.Tombstone();
482   }
483   return true;
484 }
485 
DisassociateModels()486 syncer::SyncError TypedUrlModelAssociator::DisassociateModels() {
487   return syncer::SyncError();
488 }
489 
AbortAssociation()490 void TypedUrlModelAssociator::AbortAssociation() {
491   base::AutoLock lock(abort_lock_);
492   abort_requested_ = true;
493 }
494 
SyncModelHasUserCreatedNodes(bool * has_nodes)495 bool TypedUrlModelAssociator::SyncModelHasUserCreatedNodes(bool* has_nodes) {
496   DCHECK(has_nodes);
497   *has_nodes = false;
498   syncer::ReadTransaction trans(FROM_HERE, sync_service_->GetUserShare());
499   syncer::ReadNode sync_node(&trans);
500   if (sync_node.InitTypeRoot(syncer::TYPED_URLS) != syncer::BaseNode::INIT_OK) {
501     LOG(ERROR) << "Server did not create the top-level typed_url node. We "
502                << "might be running against an out-of-date server.";
503     return false;
504   }
505 
506   // The sync model has user created nodes if the typed_url folder has any
507   // children.
508   *has_nodes = sync_node.HasChildren();
509   return true;
510 }
511 
WriteToHistoryBackend(const history::URLRows * new_urls,const TypedUrlUpdateVector * updated_urls,const TypedUrlVisitVector * new_visits,const history::VisitVector * deleted_visits)512 void TypedUrlModelAssociator::WriteToHistoryBackend(
513     const history::URLRows* new_urls,
514     const TypedUrlUpdateVector* updated_urls,
515     const TypedUrlVisitVector* new_visits,
516     const history::VisitVector* deleted_visits) {
517   if (new_urls) {
518     history_backend_->AddPagesWithDetails(*new_urls, history::SOURCE_SYNCED);
519   }
520   if (updated_urls) {
521     for (TypedUrlUpdateVector::const_iterator url = updated_urls->begin();
522          url != updated_urls->end(); ++url) {
523       // This is an existing entry in the URL database. We don't verify the
524       // visit_count or typed_count values here, because either one (or both)
525       // could be zero in the case of bookmarks, or in the case of a URL
526       // transitioning from non-typed to typed as a result of this sync.
527       ++num_db_accesses_;
528       if (!history_backend_->UpdateURL(url->first, url->second)) {
529         // In the field we sometimes run into errors on specific URLs. It's OK
530         // to just continue on (we can try writing again on the next model
531         // association).
532         ++num_db_errors_;
533         DLOG(ERROR) << "Could not update page: " << url->second.url().spec();
534       }
535     }
536   }
537   if (new_visits) {
538     for (TypedUrlVisitVector::const_iterator visits = new_visits->begin();
539          visits != new_visits->end(); ++visits) {
540       // If there are no visits to add, just skip this.
541       if (visits->second.empty())
542         continue;
543       ++num_db_accesses_;
544       if (!history_backend_->AddVisits(visits->first, visits->second,
545                                        history::SOURCE_SYNCED)) {
546         ++num_db_errors_;
547         DLOG(ERROR) << "Could not add visits.";
548       }
549     }
550   }
551   if (deleted_visits) {
552     ++num_db_accesses_;
553     if (!history_backend_->RemoveVisits(*deleted_visits)) {
554       ++num_db_errors_;
555       DLOG(ERROR) << "Could not remove visits.";
556       // This is bad news, since it means we may end up resurrecting history
557       // entries on the next reload. It's unavoidable so we'll just keep on
558       // syncing.
559     }
560   }
561 }
562 
563 // static
MergeUrls(const sync_pb::TypedUrlSpecifics & node,const history::URLRow & url,history::VisitVector * visits,history::URLRow * new_url,std::vector<history::VisitInfo> * new_visits)564 TypedUrlModelAssociator::MergeResult TypedUrlModelAssociator::MergeUrls(
565     const sync_pb::TypedUrlSpecifics& node,
566     const history::URLRow& url,
567     history::VisitVector* visits,
568     history::URLRow* new_url,
569     std::vector<history::VisitInfo>* new_visits) {
570   DCHECK(new_url);
571   DCHECK(!node.url().compare(url.url().spec()));
572   DCHECK(!node.url().compare(new_url->url().spec()));
573   DCHECK(visits->size());
574   CHECK_EQ(node.visits_size(), node.visit_transitions_size());
575 
576   // If we have an old-format node (before we added the visits and
577   // visit_transitions arrays to the protobuf) or else the node only contained
578   // expired visits, so just overwrite it with our local history data.
579   if (node.visits_size() == 0)
580     return DIFF_UPDATE_NODE;
581 
582   // Convert these values only once.
583   base::string16 node_title(base::UTF8ToUTF16(node.title()));
584   base::Time node_last_visit = base::Time::FromInternalValue(
585       node.visits(node.visits_size() - 1));
586 
587   // This is a bitfield representing what we'll need to update with the output
588   // value.
589   MergeResult different = DIFF_NONE;
590 
591   // Check if the non-incremented values changed.
592   if ((node_title.compare(url.title()) != 0) ||
593       (node.hidden() != url.hidden())) {
594     // Use the values from the most recent visit.
595     if (node_last_visit >= url.last_visit()) {
596       new_url->set_title(node_title);
597       new_url->set_hidden(node.hidden());
598       different |= DIFF_LOCAL_ROW_CHANGED;
599     } else {
600       new_url->set_title(url.title());
601       new_url->set_hidden(url.hidden());
602       different |= DIFF_UPDATE_NODE;
603     }
604   } else {
605     // No difference.
606     new_url->set_title(url.title());
607     new_url->set_hidden(url.hidden());
608   }
609 
610   size_t node_num_visits = node.visits_size();
611   size_t history_num_visits = visits->size();
612   size_t node_visit_index = 0;
613   size_t history_visit_index = 0;
614   base::Time earliest_history_time = (*visits)[0].visit_time;
615   // Walk through the two sets of visits and figure out if any new visits were
616   // added on either side.
617   while (node_visit_index < node_num_visits ||
618          history_visit_index < history_num_visits) {
619     // Time objects are initialized to "earliest possible time".
620     base::Time node_time, history_time;
621     if (node_visit_index < node_num_visits)
622       node_time = base::Time::FromInternalValue(node.visits(node_visit_index));
623     if (history_visit_index < history_num_visits)
624       history_time = (*visits)[history_visit_index].visit_time;
625     if (node_visit_index >= node_num_visits ||
626         (history_visit_index < history_num_visits &&
627          node_time > history_time)) {
628       // We found a visit in the history DB that doesn't exist in the sync DB,
629       // so mark the node as modified so the caller will update the sync node.
630       different |= DIFF_UPDATE_NODE;
631       ++history_visit_index;
632     } else if (history_visit_index >= history_num_visits ||
633                node_time < history_time) {
634       // Found a visit in the sync node that doesn't exist in the history DB, so
635       // add it to our list of new visits and set the appropriate flag so the
636       // caller will update the history DB.
637       // If the node visit is older than any existing visit in the history DB,
638       // don't re-add it - this keeps us from resurrecting visits that were
639       // aged out locally.
640       if (node_time > earliest_history_time) {
641         different |= DIFF_LOCAL_VISITS_ADDED;
642         new_visits->push_back(history::VisitInfo(
643             node_time,
644             content::PageTransitionFromInt(
645                 node.visit_transitions(node_visit_index))));
646       }
647       // This visit is added to visits below.
648       ++node_visit_index;
649     } else {
650       // Same (already synced) entry found in both DBs - no need to do anything.
651       ++node_visit_index;
652       ++history_visit_index;
653     }
654   }
655 
656   DCHECK(CheckVisitOrdering(*visits));
657   if (different & DIFF_LOCAL_VISITS_ADDED) {
658     // Insert new visits into the apropriate place in the visits vector.
659     history::VisitVector::iterator visit_ix = visits->begin();
660     for (std::vector<history::VisitInfo>::iterator new_visit =
661              new_visits->begin();
662          new_visit != new_visits->end(); ++new_visit) {
663       while (visit_ix != visits->end() &&
664              new_visit->first > visit_ix->visit_time) {
665         ++visit_ix;
666       }
667       visit_ix = visits->insert(visit_ix,
668                                 history::VisitRow(url.id(), new_visit->first,
669                                                   0, new_visit->second, 0));
670       ++visit_ix;
671     }
672   }
673   DCHECK(CheckVisitOrdering(*visits));
674 
675   new_url->set_last_visit(visits->back().visit_time);
676   return different;
677 }
678 
679 // static
WriteToSyncNode(const history::URLRow & url,const history::VisitVector & visits,syncer::WriteNode * node)680 void TypedUrlModelAssociator::WriteToSyncNode(
681     const history::URLRow& url,
682     const history::VisitVector& visits,
683     syncer::WriteNode* node) {
684   sync_pb::TypedUrlSpecifics typed_url;
685   WriteToTypedUrlSpecifics(url, visits, &typed_url);
686   node->SetTypedUrlSpecifics(typed_url);
687 }
688 
WriteToTypedUrlSpecifics(const history::URLRow & url,const history::VisitVector & visits,sync_pb::TypedUrlSpecifics * typed_url)689 void TypedUrlModelAssociator::WriteToTypedUrlSpecifics(
690     const history::URLRow& url,
691     const history::VisitVector& visits,
692     sync_pb::TypedUrlSpecifics* typed_url) {
693 
694   DCHECK(!url.last_visit().is_null());
695   DCHECK(!visits.empty());
696   DCHECK_EQ(url.last_visit().ToInternalValue(),
697             visits.back().visit_time.ToInternalValue());
698 
699   typed_url->set_url(url.url().spec());
700   typed_url->set_title(base::UTF16ToUTF8(url.title()));
701   typed_url->set_hidden(url.hidden());
702 
703   DCHECK(CheckVisitOrdering(visits));
704 
705   bool only_typed = false;
706   int skip_count = 0;
707 
708   if (visits.size() > static_cast<size_t>(kMaxTypedUrlVisits)) {
709     int typed_count = 0;
710     int total = 0;
711     // Walk the passed-in visit vector and count the # of typed visits.
712     for (history::VisitVector::const_iterator visit = visits.begin();
713          visit != visits.end(); ++visit) {
714       content::PageTransition transition = content::PageTransitionFromInt(
715           visit->transition & content::PAGE_TRANSITION_CORE_MASK);
716       // We ignore reload visits.
717       if (transition == content::PAGE_TRANSITION_RELOAD)
718         continue;
719       ++total;
720       if (transition == content::PAGE_TRANSITION_TYPED)
721         ++typed_count;
722     }
723     // We should have at least one typed visit. This can sometimes happen if
724     // the history DB has an inaccurate count for some reason (there's been
725     // bugs in the history code in the past which has left users in the wild
726     // with incorrect counts - http://crbug.com/84258).
727     DCHECK(typed_count > 0);
728 
729     if (typed_count > kMaxTypedUrlVisits) {
730       only_typed = true;
731       skip_count = typed_count - kMaxTypedUrlVisits;
732     } else if (total > kMaxTypedUrlVisits) {
733       skip_count = total - kMaxTypedUrlVisits;
734     }
735   }
736 
737 
738   for (history::VisitVector::const_iterator visit = visits.begin();
739        visit != visits.end(); ++visit) {
740     content::PageTransition transition = content::PageTransitionFromInt(
741         visit->transition & content::PAGE_TRANSITION_CORE_MASK);
742     // Skip reload visits.
743     if (transition == content::PAGE_TRANSITION_RELOAD)
744       continue;
745 
746     // If we only have room for typed visits, then only add typed visits.
747     if (only_typed && transition != content::PAGE_TRANSITION_TYPED)
748       continue;
749 
750     if (skip_count > 0) {
751       // We have too many entries to fit, so we need to skip the oldest ones.
752       // Only skip typed URLs if there are too many typed URLs to fit.
753       if (only_typed || transition != content::PAGE_TRANSITION_TYPED) {
754         --skip_count;
755         continue;
756       }
757     }
758     typed_url->add_visits(visit->visit_time.ToInternalValue());
759     typed_url->add_visit_transitions(visit->transition);
760   }
761   DCHECK_EQ(skip_count, 0);
762 
763   if (typed_url->visits_size() == 0) {
764     // If we get here, it's because we don't actually have any TYPED visits
765     // even though the visit's typed_count > 0 (corrupted typed_count). So
766     // let's go ahead and add a RELOAD visit at the most recent visit since
767     // it's not legal to have an empty visit array (yet another workaround
768     // for http://crbug.com/84258).
769     typed_url->add_visits(url.last_visit().ToInternalValue());
770     typed_url->add_visit_transitions(content::PAGE_TRANSITION_RELOAD);
771   }
772   CHECK_GT(typed_url->visits_size(), 0);
773   CHECK_LE(typed_url->visits_size(), kMaxTypedUrlVisits);
774   CHECK_EQ(typed_url->visits_size(), typed_url->visit_transitions_size());
775 }
776 
777 // static
DiffVisits(const history::VisitVector & old_visits,const sync_pb::TypedUrlSpecifics & new_url,std::vector<history::VisitInfo> * new_visits,history::VisitVector * removed_visits)778 void TypedUrlModelAssociator::DiffVisits(
779     const history::VisitVector& old_visits,
780     const sync_pb::TypedUrlSpecifics& new_url,
781     std::vector<history::VisitInfo>* new_visits,
782     history::VisitVector* removed_visits) {
783   DCHECK(new_visits);
784   size_t old_visit_count = old_visits.size();
785   size_t new_visit_count = new_url.visits_size();
786   size_t old_index = 0;
787   size_t new_index = 0;
788   while (old_index < old_visit_count && new_index < new_visit_count) {
789     base::Time new_visit_time =
790         base::Time::FromInternalValue(new_url.visits(new_index));
791     if (old_visits[old_index].visit_time < new_visit_time) {
792       if (new_index > 0 && removed_visits) {
793         // If there are visits missing from the start of the node, that
794         // means that they were probably clipped off due to our code that
795         // limits the size of the sync nodes - don't delete them from our
796         // local history.
797         removed_visits->push_back(old_visits[old_index]);
798       }
799       ++old_index;
800     } else if (old_visits[old_index].visit_time > new_visit_time) {
801       new_visits->push_back(history::VisitInfo(
802           new_visit_time,
803           content::PageTransitionFromInt(
804               new_url.visit_transitions(new_index))));
805       ++new_index;
806     } else {
807       ++old_index;
808       ++new_index;
809     }
810   }
811 
812   if (removed_visits) {
813     for ( ; old_index < old_visit_count; ++old_index) {
814       removed_visits->push_back(old_visits[old_index]);
815     }
816   }
817 
818   for ( ; new_index < new_visit_count; ++new_index) {
819     new_visits->push_back(history::VisitInfo(
820         base::Time::FromInternalValue(new_url.visits(new_index)),
821         content::PageTransitionFromInt(new_url.visit_transitions(new_index))));
822   }
823 }
824 
825 
826 // static
UpdateURLRowFromTypedUrlSpecifics(const sync_pb::TypedUrlSpecifics & typed_url,history::URLRow * new_url)827 void TypedUrlModelAssociator::UpdateURLRowFromTypedUrlSpecifics(
828     const sync_pb::TypedUrlSpecifics& typed_url, history::URLRow* new_url) {
829   DCHECK_GT(typed_url.visits_size(), 0);
830   CHECK_EQ(typed_url.visit_transitions_size(), typed_url.visits_size());
831   new_url->set_title(base::UTF8ToUTF16(typed_url.title()));
832   new_url->set_hidden(typed_url.hidden());
833   // Only provide the initial value for the last_visit field - after that, let
834   // the history code update the last_visit field on its own.
835   if (new_url->last_visit().is_null()) {
836     new_url->set_last_visit(base::Time::FromInternalValue(
837         typed_url.visits(typed_url.visits_size() - 1)));
838   }
839 }
840 
CryptoReadyIfNecessary()841 bool TypedUrlModelAssociator::CryptoReadyIfNecessary() {
842   // We only access the cryptographer while holding a transaction.
843   syncer::ReadTransaction trans(FROM_HERE, sync_service_->GetUserShare());
844   const syncer::ModelTypeSet encrypted_types = trans.GetEncryptedTypes();
845   return !encrypted_types.Has(syncer::TYPED_URLS) ||
846          sync_service_->IsCryptographerReady(&trans);
847 }
848 
849 }  // namespace browser_sync
850