1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/sync/glue/typed_url_model_associator.h"
6
7 #include <algorithm>
8 #include <set>
9
10 #include "base/location.h"
11 #include "base/logging.h"
12 #include "base/metrics/histogram.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "chrome/browser/history/history_backend.h"
15 #include "chrome/browser/sync/profile_sync_service.h"
16 #include "content/public/browser/browser_thread.h"
17 #include "net/base/net_util.h"
18 #include "sync/api/sync_error.h"
19 #include "sync/internal_api/public/read_node.h"
20 #include "sync/internal_api/public/read_transaction.h"
21 #include "sync/internal_api/public/write_node.h"
22 #include "sync/internal_api/public/write_transaction.h"
23 #include "sync/protocol/typed_url_specifics.pb.h"
24
25 using content::BrowserThread;
26
27 namespace browser_sync {
28
29 // The server backend can't handle arbitrarily large node sizes, so to keep
30 // the size under control we limit the visit array.
31 static const int kMaxTypedUrlVisits = 100;
32
33 // There's no limit on how many visits the history DB could have for a given
34 // typed URL, so we limit how many we fetch from the DB to avoid crashes due to
35 // running out of memory (http://crbug.com/89793). This value is different
36 // from kMaxTypedUrlVisits, as some of the visits fetched from the DB may be
37 // RELOAD visits, which will be stripped.
38 static const int kMaxVisitsToFetch = 1000;
39
CheckVisitOrdering(const history::VisitVector & visits)40 static bool CheckVisitOrdering(const history::VisitVector& visits) {
41 int64 previous_visit_time = 0;
42 for (history::VisitVector::const_iterator visit = visits.begin();
43 visit != visits.end(); ++visit) {
44 if (visit != visits.begin()) {
45 // We allow duplicate visits here - they shouldn't really be allowed, but
46 // they still seem to show up sometimes and we haven't figured out the
47 // source, so we just log an error instead of failing an assertion.
48 // (http://crbug.com/91473).
49 if (previous_visit_time == visit->visit_time.ToInternalValue())
50 DVLOG(1) << "Duplicate visit time encountered";
51 else if (previous_visit_time > visit->visit_time.ToInternalValue())
52 return false;
53 }
54
55 previous_visit_time = visit->visit_time.ToInternalValue();
56 }
57 return true;
58 }
59
TypedUrlModelAssociator(ProfileSyncService * sync_service,history::HistoryBackend * history_backend,DataTypeErrorHandler * error_handler)60 TypedUrlModelAssociator::TypedUrlModelAssociator(
61 ProfileSyncService* sync_service,
62 history::HistoryBackend* history_backend,
63 DataTypeErrorHandler* error_handler)
64 : sync_service_(sync_service),
65 history_backend_(history_backend),
66 expected_loop_(base::MessageLoop::current()),
67 abort_requested_(false),
68 error_handler_(error_handler),
69 num_db_accesses_(0),
70 num_db_errors_(0) {
71 DCHECK(sync_service_);
72 // history_backend_ may be null for unit tests (since it's not mockable).
73 DCHECK(!BrowserThread::CurrentlyOn(BrowserThread::UI));
74 }
75
~TypedUrlModelAssociator()76 TypedUrlModelAssociator::~TypedUrlModelAssociator() {}
77
78
FixupURLAndGetVisits(history::URLRow * url,history::VisitVector * visits)79 bool TypedUrlModelAssociator::FixupURLAndGetVisits(
80 history::URLRow* url,
81 history::VisitVector* visits) {
82 ++num_db_accesses_;
83 CHECK(history_backend_);
84 if (!history_backend_->GetMostRecentVisitsForURL(
85 url->id(), kMaxVisitsToFetch, visits)) {
86 ++num_db_errors_;
87 return false;
88 }
89
90 // Sometimes (due to a bug elsewhere in the history or sync code, or due to
91 // a crash between adding a URL to the history database and updating the
92 // visit DB) the visit vector for a URL can be empty. If this happens, just
93 // create a new visit whose timestamp is the same as the last_visit time.
94 // This is a workaround for http://crbug.com/84258.
95 if (visits->empty()) {
96 DVLOG(1) << "Found empty visits for URL: " << url->url();
97
98 if (url->last_visit().is_null()) {
99 // If modified URL is bookmarked, history backend treats it as modified
100 // even if all its visits are deleted. Return false to stop further
101 // processing because sync expects valid visit time for modified entry.
102 return false;
103 }
104
105 history::VisitRow visit(
106 url->id(), url->last_visit(), 0, content::PAGE_TRANSITION_TYPED, 0);
107 visits->push_back(visit);
108 }
109
110 // GetMostRecentVisitsForURL() returns the data in the opposite order that
111 // we need it, so reverse it.
112 std::reverse(visits->begin(), visits->end());
113
114 // Sometimes, the last_visit field in the URL doesn't match the timestamp of
115 // the last visit in our visit array (they come from different tables, so
116 // crashes/bugs can cause them to mismatch), so just set it here.
117 url->set_last_visit(visits->back().visit_time);
118 DCHECK(CheckVisitOrdering(*visits));
119 return true;
120 }
121
ShouldIgnoreUrl(const GURL & url)122 bool TypedUrlModelAssociator::ShouldIgnoreUrl(const GURL& url) {
123 // Ignore empty URLs. Not sure how this can happen (maybe import from other
124 // busted browsers, or misuse of the history API, or just plain bugs) but we
125 // can't deal with them.
126 if (url.spec().empty())
127 return true;
128
129 // Ignore local file URLs.
130 if (url.SchemeIsFile())
131 return true;
132
133 // Ignore localhost URLs.
134 if (net::IsLocalhost(url.host()))
135 return true;
136
137 return false;
138 }
139
ShouldIgnoreVisits(const history::VisitVector & visits)140 bool TypedUrlModelAssociator::ShouldIgnoreVisits(
141 const history::VisitVector& visits) {
142 // We ignore URLs that were imported, but have never been visited by
143 // chromium.
144 static const int kLastImportedSource = history::SOURCE_EXTENSION;
145 history::VisitSourceMap map;
146 if (!history_backend_->GetVisitsSource(visits, &map))
147 return false; // If we can't read the visit, assume it's not imported.
148
149 // Walk the list of visits and look for a non-imported item.
150 for (history::VisitVector::const_iterator it = visits.begin();
151 it != visits.end(); ++it) {
152 if (map.count(it->visit_id) == 0 ||
153 map[it->visit_id] <= kLastImportedSource) {
154 return false;
155 }
156 }
157 // We only saw imported visits, so tell the caller to ignore them.
158 return true;
159 }
160
AssociateModels(syncer::SyncMergeResult * local_merge_result,syncer::SyncMergeResult * syncer_merge_result)161 syncer::SyncError TypedUrlModelAssociator::AssociateModels(
162 syncer::SyncMergeResult* local_merge_result,
163 syncer::SyncMergeResult* syncer_merge_result) {
164 ClearErrorStats();
165 syncer::SyncError error = DoAssociateModels();
166 UMA_HISTOGRAM_PERCENTAGE("Sync.TypedUrlModelAssociationErrors",
167 GetErrorPercentage());
168 ClearErrorStats();
169 return error;
170 }
171
ClearErrorStats()172 void TypedUrlModelAssociator::ClearErrorStats() {
173 num_db_accesses_ = 0;
174 num_db_errors_ = 0;
175 }
176
GetErrorPercentage() const177 int TypedUrlModelAssociator::GetErrorPercentage() const {
178 return num_db_accesses_ ? (100 * num_db_errors_ / num_db_accesses_) : 0;
179 }
180
DoAssociateModels()181 syncer::SyncError TypedUrlModelAssociator::DoAssociateModels() {
182 DVLOG(1) << "Associating TypedUrl Models";
183 DCHECK(expected_loop_ == base::MessageLoop::current());
184
185 history::URLRows typed_urls;
186 ++num_db_accesses_;
187 bool query_succeeded =
188 history_backend_ && history_backend_->GetAllTypedURLs(&typed_urls);
189
190 history::URLRows new_urls;
191 TypedUrlVisitVector new_visits;
192 TypedUrlUpdateVector updated_urls;
193 {
194 base::AutoLock au(abort_lock_);
195 if (abort_requested_) {
196 return syncer::SyncError(FROM_HERE,
197 syncer::SyncError::DATATYPE_ERROR,
198 "Association was aborted.",
199 model_type());
200 }
201
202 // Must lock and check first to make sure |error_handler_| is valid.
203 if (!query_succeeded) {
204 ++num_db_errors_;
205 return error_handler_->CreateAndUploadError(
206 FROM_HERE,
207 "Could not get the typed_url entries.",
208 model_type());
209 }
210
211 // Get all the visits.
212 std::map<history::URLID, history::VisitVector> visit_vectors;
213 for (history::URLRows::iterator ix = typed_urls.begin();
214 ix != typed_urls.end();) {
215 DCHECK_EQ(0U, visit_vectors.count(ix->id()));
216 if (!FixupURLAndGetVisits(&(*ix), &(visit_vectors[ix->id()])) ||
217 ShouldIgnoreUrl(ix->url()) ||
218 ShouldIgnoreVisits(visit_vectors[ix->id()])) {
219 // Ignore this URL if we couldn't load the visits or if there's some
220 // other problem with it (it was empty, or imported and never visited).
221 ix = typed_urls.erase(ix);
222 } else {
223 ++ix;
224 }
225 }
226
227 syncer::WriteTransaction trans(FROM_HERE, sync_service_->GetUserShare());
228 syncer::ReadNode typed_url_root(&trans);
229 if (typed_url_root.InitTypeRoot(syncer::TYPED_URLS) !=
230 syncer::BaseNode::INIT_OK) {
231 return error_handler_->CreateAndUploadError(
232 FROM_HERE,
233 "Server did not create the top-level typed_url node. We "
234 "might be running against an out-of-date server.",
235 model_type());
236 }
237
238 std::set<std::string> current_urls;
239 for (history::URLRows::iterator ix = typed_urls.begin();
240 ix != typed_urls.end(); ++ix) {
241 std::string tag = ix->url().spec();
242 // Empty URLs should be filtered out by ShouldIgnoreUrl() previously.
243 DCHECK(!tag.empty());
244 history::VisitVector& visits = visit_vectors[ix->id()];
245
246 syncer::ReadNode node(&trans);
247 if (node.InitByClientTagLookup(syncer::TYPED_URLS, tag) ==
248 syncer::BaseNode::INIT_OK) {
249 // Same URL exists in sync data and in history data - compare the
250 // entries to see if there's any difference.
251 sync_pb::TypedUrlSpecifics typed_url(
252 FilterExpiredVisits(node.GetTypedUrlSpecifics()));
253 DCHECK_EQ(tag, typed_url.url());
254
255 // Initialize fields in |new_url| to the same values as the fields in
256 // the existing URLRow in the history DB. This is needed because we
257 // overwrite the existing value below in WriteToHistoryBackend(), but
258 // some of the values in that structure are not synced (like
259 // typed_count).
260 history::URLRow new_url(*ix);
261
262 std::vector<history::VisitInfo> added_visits;
263 MergeResult difference =
264 MergeUrls(typed_url, *ix, &visits, &new_url, &added_visits);
265 if (difference & DIFF_UPDATE_NODE) {
266 syncer::WriteNode write_node(&trans);
267 if (write_node.InitByClientTagLookup(syncer::TYPED_URLS, tag) !=
268 syncer::BaseNode::INIT_OK) {
269 return error_handler_->CreateAndUploadError(
270 FROM_HERE,
271 "Failed to edit typed_url sync node.",
272 model_type());
273 }
274 // We don't want to resurrect old visits that have been aged out by
275 // other clients, so remove all visits that are older than the
276 // earliest existing visit in the sync node.
277 if (typed_url.visits_size() > 0) {
278 base::Time earliest_visit =
279 base::Time::FromInternalValue(typed_url.visits(0));
280 for (history::VisitVector::iterator it = visits.begin();
281 it != visits.end() && it->visit_time < earliest_visit; ) {
282 it = visits.erase(it);
283 }
284 // Should never be possible to delete all the items, since the
285 // visit vector contains all the items in typed_url.visits.
286 DCHECK(visits.size() > 0);
287 }
288 DCHECK_EQ(new_url.last_visit().ToInternalValue(),
289 visits.back().visit_time.ToInternalValue());
290 WriteToSyncNode(new_url, visits, &write_node);
291 }
292 if (difference & DIFF_LOCAL_ROW_CHANGED) {
293 updated_urls.push_back(
294 std::pair<history::URLID, history::URLRow>(ix->id(), new_url));
295 }
296 if (difference & DIFF_LOCAL_VISITS_ADDED) {
297 new_visits.push_back(
298 std::pair<GURL, std::vector<history::VisitInfo> >(ix->url(),
299 added_visits));
300 }
301 } else {
302 // Sync has never seen this URL before.
303 syncer::WriteNode node(&trans);
304 syncer::WriteNode::InitUniqueByCreationResult result =
305 node.InitUniqueByCreation(syncer::TYPED_URLS,
306 typed_url_root, tag);
307 if (result != syncer::WriteNode::INIT_SUCCESS) {
308 return error_handler_->CreateAndUploadError(
309 FROM_HERE,
310 "Failed to create typed_url sync node: " + tag,
311 model_type());
312 }
313
314 node.SetTitle(tag);
315 WriteToSyncNode(*ix, visits, &node);
316 }
317
318 current_urls.insert(tag);
319 }
320
321 // Now walk the sync nodes and detect any URLs that exist there, but not in
322 // the history DB, so we can add them to our local history DB.
323 std::vector<int64> obsolete_nodes;
324 int64 sync_child_id = typed_url_root.GetFirstChildId();
325 while (sync_child_id != syncer::kInvalidId) {
326 syncer::ReadNode sync_child_node(&trans);
327 if (sync_child_node.InitByIdLookup(sync_child_id) !=
328 syncer::BaseNode::INIT_OK) {
329 return error_handler_->CreateAndUploadError(
330 FROM_HERE,
331 "Failed to fetch child node.",
332 model_type());
333 }
334 const sync_pb::TypedUrlSpecifics& typed_url(
335 sync_child_node.GetTypedUrlSpecifics());
336
337 sync_child_id = sync_child_node.GetSuccessorId();
338
339 // Ignore old sync nodes that don't have any transition data stored with
340 // them, or transition data that does not match the visit data (will be
341 // deleted below).
342 if (typed_url.visit_transitions_size() == 0 ||
343 typed_url.visit_transitions_size() != typed_url.visits_size()) {
344 // Generate a debug assertion to help track down http://crbug.com/91473,
345 // even though we gracefully handle this case by throwing away this
346 // node.
347 DCHECK_EQ(typed_url.visits_size(), typed_url.visit_transitions_size());
348 DVLOG(1) << "Deleting obsolete sync node with no visit "
349 << "transition info.";
350 obsolete_nodes.push_back(sync_child_node.GetId());
351 continue;
352 }
353
354 if (typed_url.url().empty()) {
355 DVLOG(1) << "Ignoring empty URL in sync DB";
356 continue;
357 }
358
359 // Now, get rid of the expired visits, and if there are no un-expired
360 // visits left, just ignore this node.
361 sync_pb::TypedUrlSpecifics filtered_url = FilterExpiredVisits(typed_url);
362 if (filtered_url.visits_size() == 0) {
363 DVLOG(1) << "Ignoring expired URL in sync DB: " << filtered_url.url();
364 continue;
365 }
366
367 if (current_urls.find(filtered_url.url()) == current_urls.end()) {
368 // Update the local DB from the sync DB. Since we are doing our
369 // initial model association, we don't want to remove any of the
370 // existing visits (pass NULL as |visits_to_remove|).
371 UpdateFromSyncDB(filtered_url,
372 &new_visits,
373 NULL,
374 &updated_urls,
375 &new_urls);
376 }
377 }
378
379 // If we encountered any obsolete nodes, remove them so they don't hang
380 // around and confuse people looking at the sync node browser.
381 if (!obsolete_nodes.empty()) {
382 for (std::vector<int64>::const_iterator it = obsolete_nodes.begin();
383 it != obsolete_nodes.end();
384 ++it) {
385 syncer::WriteNode sync_node(&trans);
386 if (sync_node.InitByIdLookup(*it) != syncer::BaseNode::INIT_OK) {
387 return error_handler_->CreateAndUploadError(
388 FROM_HERE,
389 "Failed to fetch obsolete node.",
390 model_type());
391 }
392 sync_node.Tombstone();
393 }
394 }
395 }
396
397 // Since we're on the history thread, we don't have to worry about updating
398 // the history database after closing the write transaction, since
399 // this is the only thread that writes to the database. We also don't have
400 // to worry about the sync model getting out of sync, because changes are
401 // propagated to the ChangeProcessor on this thread.
402 WriteToHistoryBackend(&new_urls, &updated_urls, &new_visits, NULL);
403 return syncer::SyncError();
404 }
405
UpdateFromSyncDB(const sync_pb::TypedUrlSpecifics & typed_url,TypedUrlVisitVector * visits_to_add,history::VisitVector * visits_to_remove,TypedUrlUpdateVector * updated_urls,history::URLRows * new_urls)406 void TypedUrlModelAssociator::UpdateFromSyncDB(
407 const sync_pb::TypedUrlSpecifics& typed_url,
408 TypedUrlVisitVector* visits_to_add,
409 history::VisitVector* visits_to_remove,
410 TypedUrlUpdateVector* updated_urls,
411 history::URLRows* new_urls) {
412 history::URLRow new_url(GURL(typed_url.url()));
413 history::VisitVector existing_visits;
414 bool existing_url = history_backend_->GetURL(new_url.url(), &new_url);
415 if (existing_url) {
416 // This URL already exists locally - fetch the visits so we can
417 // merge them below.
418 if (!FixupURLAndGetVisits(&new_url, &existing_visits)) {
419 // Couldn't load the visits for this URL due to some kind of DB error.
420 // Don't bother writing this URL to the history DB (if we ignore the
421 // error and continue, we might end up duplicating existing visits).
422 DLOG(ERROR) << "Could not load visits for url: " << new_url.url();
423 return;
424 }
425 }
426 visits_to_add->push_back(std::pair<GURL, std::vector<history::VisitInfo> >(
427 new_url.url(), std::vector<history::VisitInfo>()));
428
429 // Update the URL with information from the typed URL.
430 UpdateURLRowFromTypedUrlSpecifics(typed_url, &new_url);
431
432 // Figure out which visits we need to add.
433 DiffVisits(existing_visits, typed_url, &visits_to_add->back().second,
434 visits_to_remove);
435
436 if (existing_url) {
437 updated_urls->push_back(
438 std::pair<history::URLID, history::URLRow>(new_url.id(), new_url));
439 } else {
440 new_urls->push_back(new_url);
441 }
442 }
443
FilterExpiredVisits(const sync_pb::TypedUrlSpecifics & source)444 sync_pb::TypedUrlSpecifics TypedUrlModelAssociator::FilterExpiredVisits(
445 const sync_pb::TypedUrlSpecifics& source) {
446 // Make a copy of the source, then regenerate the visits.
447 sync_pb::TypedUrlSpecifics specifics(source);
448 specifics.clear_visits();
449 specifics.clear_visit_transitions();
450 for (int i = 0; i < source.visits_size(); ++i) {
451 base::Time time = base::Time::FromInternalValue(source.visits(i));
452 if (!history_backend_->IsExpiredVisitTime(time)) {
453 specifics.add_visits(source.visits(i));
454 specifics.add_visit_transitions(source.visit_transitions(i));
455 }
456 }
457 DCHECK(specifics.visits_size() == specifics.visit_transitions_size());
458 return specifics;
459 }
460
DeleteAllNodes(syncer::WriteTransaction * trans)461 bool TypedUrlModelAssociator::DeleteAllNodes(
462 syncer::WriteTransaction* trans) {
463 DCHECK(expected_loop_ == base::MessageLoop::current());
464
465 // Just walk through all our child nodes and delete them.
466 syncer::ReadNode typed_url_root(trans);
467 if (typed_url_root.InitTypeRoot(syncer::TYPED_URLS) !=
468 syncer::BaseNode::INIT_OK) {
469 LOG(ERROR) << "Could not lookup root node";
470 return false;
471 }
472 int64 sync_child_id = typed_url_root.GetFirstChildId();
473 while (sync_child_id != syncer::kInvalidId) {
474 syncer::WriteNode sync_child_node(trans);
475 if (sync_child_node.InitByIdLookup(sync_child_id) !=
476 syncer::BaseNode::INIT_OK) {
477 LOG(ERROR) << "Typed url node lookup failed.";
478 return false;
479 }
480 sync_child_id = sync_child_node.GetSuccessorId();
481 sync_child_node.Tombstone();
482 }
483 return true;
484 }
485
DisassociateModels()486 syncer::SyncError TypedUrlModelAssociator::DisassociateModels() {
487 return syncer::SyncError();
488 }
489
AbortAssociation()490 void TypedUrlModelAssociator::AbortAssociation() {
491 base::AutoLock lock(abort_lock_);
492 abort_requested_ = true;
493 }
494
SyncModelHasUserCreatedNodes(bool * has_nodes)495 bool TypedUrlModelAssociator::SyncModelHasUserCreatedNodes(bool* has_nodes) {
496 DCHECK(has_nodes);
497 *has_nodes = false;
498 syncer::ReadTransaction trans(FROM_HERE, sync_service_->GetUserShare());
499 syncer::ReadNode sync_node(&trans);
500 if (sync_node.InitTypeRoot(syncer::TYPED_URLS) != syncer::BaseNode::INIT_OK) {
501 LOG(ERROR) << "Server did not create the top-level typed_url node. We "
502 << "might be running against an out-of-date server.";
503 return false;
504 }
505
506 // The sync model has user created nodes if the typed_url folder has any
507 // children.
508 *has_nodes = sync_node.HasChildren();
509 return true;
510 }
511
WriteToHistoryBackend(const history::URLRows * new_urls,const TypedUrlUpdateVector * updated_urls,const TypedUrlVisitVector * new_visits,const history::VisitVector * deleted_visits)512 void TypedUrlModelAssociator::WriteToHistoryBackend(
513 const history::URLRows* new_urls,
514 const TypedUrlUpdateVector* updated_urls,
515 const TypedUrlVisitVector* new_visits,
516 const history::VisitVector* deleted_visits) {
517 if (new_urls) {
518 history_backend_->AddPagesWithDetails(*new_urls, history::SOURCE_SYNCED);
519 }
520 if (updated_urls) {
521 for (TypedUrlUpdateVector::const_iterator url = updated_urls->begin();
522 url != updated_urls->end(); ++url) {
523 // This is an existing entry in the URL database. We don't verify the
524 // visit_count or typed_count values here, because either one (or both)
525 // could be zero in the case of bookmarks, or in the case of a URL
526 // transitioning from non-typed to typed as a result of this sync.
527 ++num_db_accesses_;
528 if (!history_backend_->UpdateURL(url->first, url->second)) {
529 // In the field we sometimes run into errors on specific URLs. It's OK
530 // to just continue on (we can try writing again on the next model
531 // association).
532 ++num_db_errors_;
533 DLOG(ERROR) << "Could not update page: " << url->second.url().spec();
534 }
535 }
536 }
537 if (new_visits) {
538 for (TypedUrlVisitVector::const_iterator visits = new_visits->begin();
539 visits != new_visits->end(); ++visits) {
540 // If there are no visits to add, just skip this.
541 if (visits->second.empty())
542 continue;
543 ++num_db_accesses_;
544 if (!history_backend_->AddVisits(visits->first, visits->second,
545 history::SOURCE_SYNCED)) {
546 ++num_db_errors_;
547 DLOG(ERROR) << "Could not add visits.";
548 }
549 }
550 }
551 if (deleted_visits) {
552 ++num_db_accesses_;
553 if (!history_backend_->RemoveVisits(*deleted_visits)) {
554 ++num_db_errors_;
555 DLOG(ERROR) << "Could not remove visits.";
556 // This is bad news, since it means we may end up resurrecting history
557 // entries on the next reload. It's unavoidable so we'll just keep on
558 // syncing.
559 }
560 }
561 }
562
563 // static
MergeUrls(const sync_pb::TypedUrlSpecifics & node,const history::URLRow & url,history::VisitVector * visits,history::URLRow * new_url,std::vector<history::VisitInfo> * new_visits)564 TypedUrlModelAssociator::MergeResult TypedUrlModelAssociator::MergeUrls(
565 const sync_pb::TypedUrlSpecifics& node,
566 const history::URLRow& url,
567 history::VisitVector* visits,
568 history::URLRow* new_url,
569 std::vector<history::VisitInfo>* new_visits) {
570 DCHECK(new_url);
571 DCHECK(!node.url().compare(url.url().spec()));
572 DCHECK(!node.url().compare(new_url->url().spec()));
573 DCHECK(visits->size());
574 CHECK_EQ(node.visits_size(), node.visit_transitions_size());
575
576 // If we have an old-format node (before we added the visits and
577 // visit_transitions arrays to the protobuf) or else the node only contained
578 // expired visits, so just overwrite it with our local history data.
579 if (node.visits_size() == 0)
580 return DIFF_UPDATE_NODE;
581
582 // Convert these values only once.
583 base::string16 node_title(base::UTF8ToUTF16(node.title()));
584 base::Time node_last_visit = base::Time::FromInternalValue(
585 node.visits(node.visits_size() - 1));
586
587 // This is a bitfield representing what we'll need to update with the output
588 // value.
589 MergeResult different = DIFF_NONE;
590
591 // Check if the non-incremented values changed.
592 if ((node_title.compare(url.title()) != 0) ||
593 (node.hidden() != url.hidden())) {
594 // Use the values from the most recent visit.
595 if (node_last_visit >= url.last_visit()) {
596 new_url->set_title(node_title);
597 new_url->set_hidden(node.hidden());
598 different |= DIFF_LOCAL_ROW_CHANGED;
599 } else {
600 new_url->set_title(url.title());
601 new_url->set_hidden(url.hidden());
602 different |= DIFF_UPDATE_NODE;
603 }
604 } else {
605 // No difference.
606 new_url->set_title(url.title());
607 new_url->set_hidden(url.hidden());
608 }
609
610 size_t node_num_visits = node.visits_size();
611 size_t history_num_visits = visits->size();
612 size_t node_visit_index = 0;
613 size_t history_visit_index = 0;
614 base::Time earliest_history_time = (*visits)[0].visit_time;
615 // Walk through the two sets of visits and figure out if any new visits were
616 // added on either side.
617 while (node_visit_index < node_num_visits ||
618 history_visit_index < history_num_visits) {
619 // Time objects are initialized to "earliest possible time".
620 base::Time node_time, history_time;
621 if (node_visit_index < node_num_visits)
622 node_time = base::Time::FromInternalValue(node.visits(node_visit_index));
623 if (history_visit_index < history_num_visits)
624 history_time = (*visits)[history_visit_index].visit_time;
625 if (node_visit_index >= node_num_visits ||
626 (history_visit_index < history_num_visits &&
627 node_time > history_time)) {
628 // We found a visit in the history DB that doesn't exist in the sync DB,
629 // so mark the node as modified so the caller will update the sync node.
630 different |= DIFF_UPDATE_NODE;
631 ++history_visit_index;
632 } else if (history_visit_index >= history_num_visits ||
633 node_time < history_time) {
634 // Found a visit in the sync node that doesn't exist in the history DB, so
635 // add it to our list of new visits and set the appropriate flag so the
636 // caller will update the history DB.
637 // If the node visit is older than any existing visit in the history DB,
638 // don't re-add it - this keeps us from resurrecting visits that were
639 // aged out locally.
640 if (node_time > earliest_history_time) {
641 different |= DIFF_LOCAL_VISITS_ADDED;
642 new_visits->push_back(history::VisitInfo(
643 node_time,
644 content::PageTransitionFromInt(
645 node.visit_transitions(node_visit_index))));
646 }
647 // This visit is added to visits below.
648 ++node_visit_index;
649 } else {
650 // Same (already synced) entry found in both DBs - no need to do anything.
651 ++node_visit_index;
652 ++history_visit_index;
653 }
654 }
655
656 DCHECK(CheckVisitOrdering(*visits));
657 if (different & DIFF_LOCAL_VISITS_ADDED) {
658 // Insert new visits into the apropriate place in the visits vector.
659 history::VisitVector::iterator visit_ix = visits->begin();
660 for (std::vector<history::VisitInfo>::iterator new_visit =
661 new_visits->begin();
662 new_visit != new_visits->end(); ++new_visit) {
663 while (visit_ix != visits->end() &&
664 new_visit->first > visit_ix->visit_time) {
665 ++visit_ix;
666 }
667 visit_ix = visits->insert(visit_ix,
668 history::VisitRow(url.id(), new_visit->first,
669 0, new_visit->second, 0));
670 ++visit_ix;
671 }
672 }
673 DCHECK(CheckVisitOrdering(*visits));
674
675 new_url->set_last_visit(visits->back().visit_time);
676 return different;
677 }
678
679 // static
WriteToSyncNode(const history::URLRow & url,const history::VisitVector & visits,syncer::WriteNode * node)680 void TypedUrlModelAssociator::WriteToSyncNode(
681 const history::URLRow& url,
682 const history::VisitVector& visits,
683 syncer::WriteNode* node) {
684 sync_pb::TypedUrlSpecifics typed_url;
685 WriteToTypedUrlSpecifics(url, visits, &typed_url);
686 node->SetTypedUrlSpecifics(typed_url);
687 }
688
WriteToTypedUrlSpecifics(const history::URLRow & url,const history::VisitVector & visits,sync_pb::TypedUrlSpecifics * typed_url)689 void TypedUrlModelAssociator::WriteToTypedUrlSpecifics(
690 const history::URLRow& url,
691 const history::VisitVector& visits,
692 sync_pb::TypedUrlSpecifics* typed_url) {
693
694 DCHECK(!url.last_visit().is_null());
695 DCHECK(!visits.empty());
696 DCHECK_EQ(url.last_visit().ToInternalValue(),
697 visits.back().visit_time.ToInternalValue());
698
699 typed_url->set_url(url.url().spec());
700 typed_url->set_title(base::UTF16ToUTF8(url.title()));
701 typed_url->set_hidden(url.hidden());
702
703 DCHECK(CheckVisitOrdering(visits));
704
705 bool only_typed = false;
706 int skip_count = 0;
707
708 if (visits.size() > static_cast<size_t>(kMaxTypedUrlVisits)) {
709 int typed_count = 0;
710 int total = 0;
711 // Walk the passed-in visit vector and count the # of typed visits.
712 for (history::VisitVector::const_iterator visit = visits.begin();
713 visit != visits.end(); ++visit) {
714 content::PageTransition transition = content::PageTransitionFromInt(
715 visit->transition & content::PAGE_TRANSITION_CORE_MASK);
716 // We ignore reload visits.
717 if (transition == content::PAGE_TRANSITION_RELOAD)
718 continue;
719 ++total;
720 if (transition == content::PAGE_TRANSITION_TYPED)
721 ++typed_count;
722 }
723 // We should have at least one typed visit. This can sometimes happen if
724 // the history DB has an inaccurate count for some reason (there's been
725 // bugs in the history code in the past which has left users in the wild
726 // with incorrect counts - http://crbug.com/84258).
727 DCHECK(typed_count > 0);
728
729 if (typed_count > kMaxTypedUrlVisits) {
730 only_typed = true;
731 skip_count = typed_count - kMaxTypedUrlVisits;
732 } else if (total > kMaxTypedUrlVisits) {
733 skip_count = total - kMaxTypedUrlVisits;
734 }
735 }
736
737
738 for (history::VisitVector::const_iterator visit = visits.begin();
739 visit != visits.end(); ++visit) {
740 content::PageTransition transition = content::PageTransitionFromInt(
741 visit->transition & content::PAGE_TRANSITION_CORE_MASK);
742 // Skip reload visits.
743 if (transition == content::PAGE_TRANSITION_RELOAD)
744 continue;
745
746 // If we only have room for typed visits, then only add typed visits.
747 if (only_typed && transition != content::PAGE_TRANSITION_TYPED)
748 continue;
749
750 if (skip_count > 0) {
751 // We have too many entries to fit, so we need to skip the oldest ones.
752 // Only skip typed URLs if there are too many typed URLs to fit.
753 if (only_typed || transition != content::PAGE_TRANSITION_TYPED) {
754 --skip_count;
755 continue;
756 }
757 }
758 typed_url->add_visits(visit->visit_time.ToInternalValue());
759 typed_url->add_visit_transitions(visit->transition);
760 }
761 DCHECK_EQ(skip_count, 0);
762
763 if (typed_url->visits_size() == 0) {
764 // If we get here, it's because we don't actually have any TYPED visits
765 // even though the visit's typed_count > 0 (corrupted typed_count). So
766 // let's go ahead and add a RELOAD visit at the most recent visit since
767 // it's not legal to have an empty visit array (yet another workaround
768 // for http://crbug.com/84258).
769 typed_url->add_visits(url.last_visit().ToInternalValue());
770 typed_url->add_visit_transitions(content::PAGE_TRANSITION_RELOAD);
771 }
772 CHECK_GT(typed_url->visits_size(), 0);
773 CHECK_LE(typed_url->visits_size(), kMaxTypedUrlVisits);
774 CHECK_EQ(typed_url->visits_size(), typed_url->visit_transitions_size());
775 }
776
777 // static
DiffVisits(const history::VisitVector & old_visits,const sync_pb::TypedUrlSpecifics & new_url,std::vector<history::VisitInfo> * new_visits,history::VisitVector * removed_visits)778 void TypedUrlModelAssociator::DiffVisits(
779 const history::VisitVector& old_visits,
780 const sync_pb::TypedUrlSpecifics& new_url,
781 std::vector<history::VisitInfo>* new_visits,
782 history::VisitVector* removed_visits) {
783 DCHECK(new_visits);
784 size_t old_visit_count = old_visits.size();
785 size_t new_visit_count = new_url.visits_size();
786 size_t old_index = 0;
787 size_t new_index = 0;
788 while (old_index < old_visit_count && new_index < new_visit_count) {
789 base::Time new_visit_time =
790 base::Time::FromInternalValue(new_url.visits(new_index));
791 if (old_visits[old_index].visit_time < new_visit_time) {
792 if (new_index > 0 && removed_visits) {
793 // If there are visits missing from the start of the node, that
794 // means that they were probably clipped off due to our code that
795 // limits the size of the sync nodes - don't delete them from our
796 // local history.
797 removed_visits->push_back(old_visits[old_index]);
798 }
799 ++old_index;
800 } else if (old_visits[old_index].visit_time > new_visit_time) {
801 new_visits->push_back(history::VisitInfo(
802 new_visit_time,
803 content::PageTransitionFromInt(
804 new_url.visit_transitions(new_index))));
805 ++new_index;
806 } else {
807 ++old_index;
808 ++new_index;
809 }
810 }
811
812 if (removed_visits) {
813 for ( ; old_index < old_visit_count; ++old_index) {
814 removed_visits->push_back(old_visits[old_index]);
815 }
816 }
817
818 for ( ; new_index < new_visit_count; ++new_index) {
819 new_visits->push_back(history::VisitInfo(
820 base::Time::FromInternalValue(new_url.visits(new_index)),
821 content::PageTransitionFromInt(new_url.visit_transitions(new_index))));
822 }
823 }
824
825
826 // static
UpdateURLRowFromTypedUrlSpecifics(const sync_pb::TypedUrlSpecifics & typed_url,history::URLRow * new_url)827 void TypedUrlModelAssociator::UpdateURLRowFromTypedUrlSpecifics(
828 const sync_pb::TypedUrlSpecifics& typed_url, history::URLRow* new_url) {
829 DCHECK_GT(typed_url.visits_size(), 0);
830 CHECK_EQ(typed_url.visit_transitions_size(), typed_url.visits_size());
831 new_url->set_title(base::UTF8ToUTF16(typed_url.title()));
832 new_url->set_hidden(typed_url.hidden());
833 // Only provide the initial value for the last_visit field - after that, let
834 // the history code update the last_visit field on its own.
835 if (new_url->last_visit().is_null()) {
836 new_url->set_last_visit(base::Time::FromInternalValue(
837 typed_url.visits(typed_url.visits_size() - 1)));
838 }
839 }
840
CryptoReadyIfNecessary()841 bool TypedUrlModelAssociator::CryptoReadyIfNecessary() {
842 // We only access the cryptographer while holding a transaction.
843 syncer::ReadTransaction trans(FROM_HERE, sync_service_->GetUserShare());
844 const syncer::ModelTypeSet encrypted_types = trans.GetEncryptedTypes();
845 return !encrypted_types.Has(syncer::TYPED_URLS) ||
846 sync_service_->IsCryptographerReady(&trans);
847 }
848
849 } // namespace browser_sync
850