1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/history/typed_url_syncable_service.h"
6
7 #include "base/auto_reset.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "chrome/browser/history/history_backend.h"
12 #include "net/base/net_util.h"
13 #include "sync/protocol/sync.pb.h"
14 #include "sync/protocol/typed_url_specifics.pb.h"
15
16 namespace {
17
18 // The server backend can't handle arbitrarily large node sizes, so to keep
19 // the size under control we limit the visit array.
20 static const int kMaxTypedUrlVisits = 100;
21
22 // There's no limit on how many visits the history DB could have for a given
23 // typed URL, so we limit how many we fetch from the DB to avoid crashes due to
24 // running out of memory (http://crbug.com/89793). This value is different
25 // from kMaxTypedUrlVisits, as some of the visits fetched from the DB may be
26 // RELOAD visits, which will be stripped.
27 static const int kMaxVisitsToFetch = 1000;
28
29 // This is the threshold at which we start throttling sync updates for typed
30 // URLs - any URLs with a typed_count >= this threshold will be throttled.
31 static const int kTypedUrlVisitThrottleThreshold = 10;
32
33 // This is the multiple we use when throttling sync updates. If the multiple is
34 // N, we sync up every Nth update (i.e. when typed_count % N == 0).
35 static const int kTypedUrlVisitThrottleMultiple = 10;
36
37 } // namespace
38
39 namespace history {
40
41 const char kTypedUrlTag[] = "google_chrome_typed_urls";
42
CheckVisitOrdering(const VisitVector & visits)43 static bool CheckVisitOrdering(const VisitVector& visits) {
44 int64 previous_visit_time = 0;
45 for (VisitVector::const_iterator visit = visits.begin();
46 visit != visits.end(); ++visit) {
47 if (visit != visits.begin()) {
48 // We allow duplicate visits here - they shouldn't really be allowed, but
49 // they still seem to show up sometimes and we haven't figured out the
50 // source, so we just log an error instead of failing an assertion.
51 // (http://crbug.com/91473).
52 if (previous_visit_time == visit->visit_time.ToInternalValue())
53 DVLOG(1) << "Duplicate visit time encountered";
54 else if (previous_visit_time > visit->visit_time.ToInternalValue())
55 return false;
56 }
57
58 previous_visit_time = visit->visit_time.ToInternalValue();
59 }
60 return true;
61 }
62
TypedUrlSyncableService(HistoryBackend * history_backend)63 TypedUrlSyncableService::TypedUrlSyncableService(
64 HistoryBackend* history_backend)
65 : history_backend_(history_backend),
66 processing_syncer_changes_(false),
67 expected_loop_(base::MessageLoop::current()) {
68 DCHECK(history_backend_);
69 DCHECK(expected_loop_ == base::MessageLoop::current());
70 }
71
~TypedUrlSyncableService()72 TypedUrlSyncableService::~TypedUrlSyncableService() {
73 DCHECK(expected_loop_ == base::MessageLoop::current());
74 }
75
MergeDataAndStartSyncing(syncer::ModelType type,const syncer::SyncDataList & initial_sync_data,scoped_ptr<syncer::SyncChangeProcessor> sync_processor,scoped_ptr<syncer::SyncErrorFactory> error_handler)76 syncer::SyncMergeResult TypedUrlSyncableService::MergeDataAndStartSyncing(
77 syncer::ModelType type,
78 const syncer::SyncDataList& initial_sync_data,
79 scoped_ptr<syncer::SyncChangeProcessor> sync_processor,
80 scoped_ptr<syncer::SyncErrorFactory> error_handler) {
81 DCHECK(expected_loop_ == base::MessageLoop::current());
82 DCHECK(!sync_processor_.get());
83 DCHECK(sync_processor.get());
84 DCHECK(error_handler.get());
85 DCHECK_EQ(type, syncer::TYPED_URLS);
86
87 syncer::SyncMergeResult merge_result(type);
88 sync_processor_ = sync_processor.Pass();
89 sync_error_handler_ = error_handler.Pass();
90
91 // TODO(mgist): Add implementation
92
93 return merge_result;
94 }
95
StopSyncing(syncer::ModelType type)96 void TypedUrlSyncableService::StopSyncing(syncer::ModelType type) {
97 DCHECK(expected_loop_ == base::MessageLoop::current());
98 DCHECK_EQ(type, syncer::TYPED_URLS);
99
100 sync_processor_.reset();
101 sync_error_handler_.reset();
102 }
103
GetAllSyncData(syncer::ModelType type) const104 syncer::SyncDataList TypedUrlSyncableService::GetAllSyncData(
105 syncer::ModelType type) const {
106 DCHECK(expected_loop_ == base::MessageLoop::current());
107 syncer::SyncDataList list;
108
109 // TODO(mgist): Add implementation
110
111 return list;
112 }
113
ProcessSyncChanges(const tracked_objects::Location & from_here,const syncer::SyncChangeList & change_list)114 syncer::SyncError TypedUrlSyncableService::ProcessSyncChanges(
115 const tracked_objects::Location& from_here,
116 const syncer::SyncChangeList& change_list) {
117 DCHECK(expected_loop_ == base::MessageLoop::current());
118
119 // TODO(mgist): Add implementation
120
121 return syncer::SyncError(FROM_HERE,
122 syncer::SyncError::DATATYPE_ERROR,
123 "Typed url syncable service is not implemented.",
124 syncer::TYPED_URLS);
125 }
126
OnUrlsModified(URLRows * changed_urls)127 void TypedUrlSyncableService::OnUrlsModified(URLRows* changed_urls) {
128 DCHECK(expected_loop_ == base::MessageLoop::current());
129 DCHECK(changed_urls);
130
131 if (processing_syncer_changes_)
132 return; // These are changes originating from us, ignore.
133 if (!sync_processor_.get())
134 return; // Sync processor not yet initialized, don't sync.
135
136 // Create SyncChangeList.
137 syncer::SyncChangeList changes;
138
139 for (URLRows::iterator url = changed_urls->begin();
140 url != changed_urls->end(); ++url) {
141 // Only care if the modified URL is typed.
142 if (url->typed_count() > 0) {
143 // If there were any errors updating the sync node, just ignore them and
144 // continue on to process the next URL.
145 CreateOrUpdateSyncNode(*url, &changes);
146 }
147 }
148
149 // Send SyncChangeList to server if there are any changes.
150 if (changes.size() > 0)
151 sync_processor_->ProcessSyncChanges(FROM_HERE, changes);
152 }
153
OnUrlVisited(ui::PageTransition transition,URLRow * row)154 void TypedUrlSyncableService::OnUrlVisited(ui::PageTransition transition,
155 URLRow* row) {
156 DCHECK(expected_loop_ == base::MessageLoop::current());
157 DCHECK(row);
158
159 if (processing_syncer_changes_)
160 return; // These are changes originating from us, ignore.
161 if (!sync_processor_.get())
162 return; // Sync processor not yet initialized, don't sync.
163 if (!ShouldSyncVisit(transition, row))
164 return;
165
166 // Create SyncChangeList.
167 syncer::SyncChangeList changes;
168
169 CreateOrUpdateSyncNode(*row, &changes);
170
171 // Send SyncChangeList to server if there are any changes.
172 if (changes.size() > 0)
173 sync_processor_->ProcessSyncChanges(FROM_HERE, changes);
174 }
175
OnUrlsDeleted(bool all_history,bool expired,URLRows * rows)176 void TypedUrlSyncableService::OnUrlsDeleted(bool all_history,
177 bool expired,
178 URLRows* rows) {
179 DCHECK(expected_loop_ == base::MessageLoop::current());
180
181 if (processing_syncer_changes_)
182 return; // These are changes originating from us, ignore.
183 if (!sync_processor_.get())
184 return; // Sync processor not yet initialized, don't sync.
185
186 // Ignore URLs expired due to old age (we don't want to sync them as deletions
187 // to avoid extra traffic up to the server, and also to make sure that a
188 // client with a bad clock setting won't go on an expiration rampage and
189 // delete all history from every client). The server will gracefully age out
190 // the sync DB entries when they've been idle for long enough.
191 if (expired)
192 return;
193
194 // Create SyncChangeList.
195 syncer::SyncChangeList changes;
196
197 if (all_history) {
198 // Delete all synced typed urls.
199 for (std::set<GURL>::const_iterator url = synced_typed_urls_.begin();
200 url != synced_typed_urls_.end(); ++url) {
201 VisitVector visits;
202 URLRow row(*url);
203 AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE,
204 row, visits, url->spec(), &changes);
205 }
206 // Clear cache of server state.
207 synced_typed_urls_.clear();
208 } else {
209 DCHECK(rows);
210 // Delete rows.
211 for (URLRows::const_iterator row = rows->begin();
212 row != rows->end(); ++row) {
213 // Add specifics to change list for all synced urls that were deleted.
214 if (synced_typed_urls_.find(row->url()) != synced_typed_urls_.end()) {
215 VisitVector visits;
216 AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE,
217 *row, visits, row->url().spec(), &changes);
218 // Delete typed url from cache.
219 synced_typed_urls_.erase(row->url());
220 }
221 }
222 }
223
224 // Send SyncChangeList to server if there are any changes.
225 if (changes.size() > 0)
226 sync_processor_->ProcessSyncChanges(FROM_HERE, changes);
227 }
228
ShouldIgnoreUrl(const GURL & url)229 bool TypedUrlSyncableService::ShouldIgnoreUrl(const GURL& url) {
230 // Ignore empty URLs. Not sure how this can happen (maybe import from other
231 // busted browsers, or misuse of the history API, or just plain bugs) but we
232 // can't deal with them.
233 if (url.spec().empty())
234 return true;
235
236 // Ignore local file URLs.
237 if (url.SchemeIsFile())
238 return true;
239
240 // Ignore localhost URLs.
241 if (net::IsLocalhost(url.host()))
242 return true;
243
244 return false;
245 }
246
ShouldSyncVisit(ui::PageTransition page_transition,URLRow * row)247 bool TypedUrlSyncableService::ShouldSyncVisit(
248 ui::PageTransition page_transition,
249 URLRow* row) {
250 if (!row)
251 return false;
252 int typed_count = row->typed_count();
253 ui::PageTransition transition = ui::PageTransitionFromInt(
254 page_transition & ui::PAGE_TRANSITION_CORE_MASK);
255
256 // Just use an ad-hoc criteria to determine whether to ignore this
257 // notification. For most users, the distribution of visits is roughly a bell
258 // curve with a long tail - there are lots of URLs with < 5 visits so we want
259 // to make sure we sync up every visit to ensure the proper ordering of
260 // suggestions. But there are relatively few URLs with > 10 visits, and those
261 // tend to be more broadly distributed such that there's no need to sync up
262 // every visit to preserve their relative ordering.
263 return (transition == ui::PAGE_TRANSITION_TYPED &&
264 typed_count > 0 &&
265 (typed_count < kTypedUrlVisitThrottleThreshold ||
266 (typed_count % kTypedUrlVisitThrottleMultiple) == 0));
267 }
268
CreateOrUpdateSyncNode(URLRow url,syncer::SyncChangeList * changes)269 bool TypedUrlSyncableService::CreateOrUpdateSyncNode(
270 URLRow url,
271 syncer::SyncChangeList* changes) {
272 DCHECK_GT(url.typed_count(), 0);
273
274 if (ShouldIgnoreUrl(url.url()))
275 return true;
276
277 // Get the visits for this node.
278 VisitVector visit_vector;
279 if (!FixupURLAndGetVisits(&url, &visit_vector)) {
280 DLOG(ERROR) << "Could not load visits for url: " << url.url();
281 return false;
282 }
283 DCHECK(!visit_vector.empty());
284
285 std::string title = url.url().spec();
286 syncer::SyncChange::SyncChangeType change_type;
287
288 // If server already has URL, then send a sync update, else add it.
289 change_type =
290 (synced_typed_urls_.find(url.url()) != synced_typed_urls_.end()) ?
291 syncer::SyncChange::ACTION_UPDATE :
292 syncer::SyncChange::ACTION_ADD;
293
294 // Ensure cache of server state is up to date.
295 synced_typed_urls_.insert(url.url());
296
297 AddTypedUrlToChangeList(change_type, url, visit_vector, title, changes);
298
299 return true;
300 }
301
AddTypedUrlToChangeList(syncer::SyncChange::SyncChangeType change_type,const URLRow & row,const VisitVector & visits,std::string title,syncer::SyncChangeList * change_list)302 void TypedUrlSyncableService::AddTypedUrlToChangeList(
303 syncer::SyncChange::SyncChangeType change_type,
304 const URLRow& row,
305 const VisitVector& visits,
306 std::string title,
307 syncer::SyncChangeList* change_list) {
308 sync_pb::EntitySpecifics entity_specifics;
309 sync_pb::TypedUrlSpecifics* typed_url = entity_specifics.mutable_typed_url();
310
311 if (change_type == syncer::SyncChange::ACTION_DELETE) {
312 typed_url->set_url(row.url().spec());
313 } else {
314 WriteToTypedUrlSpecifics(row, visits, typed_url);
315 }
316
317 change_list->push_back(
318 syncer::SyncChange(FROM_HERE, change_type,
319 syncer::SyncData::CreateLocalData(
320 kTypedUrlTag, title, entity_specifics)));
321 }
322
WriteToTypedUrlSpecifics(const URLRow & url,const VisitVector & visits,sync_pb::TypedUrlSpecifics * typed_url)323 void TypedUrlSyncableService::WriteToTypedUrlSpecifics(
324 const URLRow& url,
325 const VisitVector& visits,
326 sync_pb::TypedUrlSpecifics* typed_url) {
327
328 DCHECK(!url.last_visit().is_null());
329 DCHECK(!visits.empty());
330 DCHECK_EQ(url.last_visit().ToInternalValue(),
331 visits.back().visit_time.ToInternalValue());
332
333 typed_url->set_url(url.url().spec());
334 typed_url->set_title(base::UTF16ToUTF8(url.title()));
335 typed_url->set_hidden(url.hidden());
336
337 DCHECK(CheckVisitOrdering(visits));
338
339 bool only_typed = false;
340 int skip_count = 0;
341
342 if (visits.size() > static_cast<size_t>(kMaxTypedUrlVisits)) {
343 int typed_count = 0;
344 int total = 0;
345 // Walk the passed-in visit vector and count the # of typed visits.
346 for (VisitVector::const_iterator visit = visits.begin();
347 visit != visits.end(); ++visit) {
348 ui::PageTransition transition = ui::PageTransitionFromInt(
349 visit->transition & ui::PAGE_TRANSITION_CORE_MASK);
350 // We ignore reload visits.
351 if (transition == ui::PAGE_TRANSITION_RELOAD)
352 continue;
353 ++total;
354 if (transition == ui::PAGE_TRANSITION_TYPED)
355 ++typed_count;
356 }
357 // We should have at least one typed visit. This can sometimes happen if
358 // the history DB has an inaccurate count for some reason (there's been
359 // bugs in the history code in the past which has left users in the wild
360 // with incorrect counts - http://crbug.com/84258).
361 DCHECK(typed_count > 0);
362
363 if (typed_count > kMaxTypedUrlVisits) {
364 only_typed = true;
365 skip_count = typed_count - kMaxTypedUrlVisits;
366 } else if (total > kMaxTypedUrlVisits) {
367 skip_count = total - kMaxTypedUrlVisits;
368 }
369 }
370
371 for (VisitVector::const_iterator visit = visits.begin();
372 visit != visits.end(); ++visit) {
373 ui::PageTransition transition =
374 ui::PageTransitionStripQualifier(visit->transition);
375 // Skip reload visits.
376 if (transition == ui::PAGE_TRANSITION_RELOAD)
377 continue;
378
379 // If we only have room for typed visits, then only add typed visits.
380 if (only_typed && transition != ui::PAGE_TRANSITION_TYPED)
381 continue;
382
383 if (skip_count > 0) {
384 // We have too many entries to fit, so we need to skip the oldest ones.
385 // Only skip typed URLs if there are too many typed URLs to fit.
386 if (only_typed || transition != ui::PAGE_TRANSITION_TYPED) {
387 --skip_count;
388 continue;
389 }
390 }
391 typed_url->add_visits(visit->visit_time.ToInternalValue());
392 typed_url->add_visit_transitions(visit->transition);
393 }
394 DCHECK_EQ(skip_count, 0);
395
396 if (typed_url->visits_size() == 0) {
397 // If we get here, it's because we don't actually have any TYPED visits
398 // even though the visit's typed_count > 0 (corrupted typed_count). So
399 // let's go ahead and add a RELOAD visit at the most recent visit since
400 // it's not legal to have an empty visit array (yet another workaround
401 // for http://crbug.com/84258).
402 typed_url->add_visits(url.last_visit().ToInternalValue());
403 typed_url->add_visit_transitions(ui::PAGE_TRANSITION_RELOAD);
404 }
405 CHECK_GT(typed_url->visits_size(), 0);
406 CHECK_LE(typed_url->visits_size(), kMaxTypedUrlVisits);
407 CHECK_EQ(typed_url->visits_size(), typed_url->visit_transitions_size());
408 }
409
FixupURLAndGetVisits(URLRow * url,VisitVector * visits)410 bool TypedUrlSyncableService::FixupURLAndGetVisits(
411 URLRow* url,
412 VisitVector* visits) {
413 ++num_db_accesses_;
414 CHECK(history_backend_);
415 if (!history_backend_->GetMostRecentVisitsForURL(
416 url->id(), kMaxVisitsToFetch, visits)) {
417 ++num_db_errors_;
418 return false;
419 }
420
421 // Sometimes (due to a bug elsewhere in the history or sync code, or due to
422 // a crash between adding a URL to the history database and updating the
423 // visit DB) the visit vector for a URL can be empty. If this happens, just
424 // create a new visit whose timestamp is the same as the last_visit time.
425 // This is a workaround for http://crbug.com/84258.
426 if (visits->empty()) {
427 DVLOG(1) << "Found empty visits for URL: " << url->url();
428 VisitRow visit(
429 url->id(), url->last_visit(), 0, ui::PAGE_TRANSITION_TYPED, 0);
430 visits->push_back(visit);
431 }
432
433 // GetMostRecentVisitsForURL() returns the data in the opposite order that
434 // we need it, so reverse it.
435 std::reverse(visits->begin(), visits->end());
436
437 // Sometimes, the last_visit field in the URL doesn't match the timestamp of
438 // the last visit in our visit array (they come from different tables, so
439 // crashes/bugs can cause them to mismatch), so just set it here.
440 url->set_last_visit(visits->back().visit_time);
441 DCHECK(CheckVisitOrdering(*visits));
442 return true;
443 }
444
445 } // namespace history
446