1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/spellchecker/spellcheck_custom_dictionary.h"
6
7 #include <functional>
8
9 #include "base/file_util.h"
10 #include "base/files/important_file_writer.h"
11 #include "base/md5.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_split.h"
14 #include "chrome/browser/spellchecker/spellcheck_host_metrics.h"
15 #include "chrome/common/chrome_constants.h"
16 #include "chrome/common/spellcheck_messages.h"
17 #include "content/public/browser/browser_thread.h"
18 #include "sync/api/sync_change.h"
19 #include "sync/api/sync_data.h"
20 #include "sync/api/sync_error_factory.h"
21 #include "sync/protocol/sync.pb.h"
22
23 using content::BrowserThread;
24 using chrome::spellcheck_common::WordList;
25 using chrome::spellcheck_common::WordSet;
26
27 namespace {
28
29 // Filename extension for backup dictionary file.
30 const base::FilePath::CharType BACKUP_EXTENSION[] = FILE_PATH_LITERAL("backup");
31
32 // Prefix for the checksum in the dictionary file.
33 const char CHECKSUM_PREFIX[] = "checksum_v1 = ";
34
35 // The status of the checksum in a custom spellcheck dictionary.
36 enum ChecksumStatus {
37 VALID_CHECKSUM,
38 INVALID_CHECKSUM,
39 };
40
41 // The result of a dictionary sanitation. Can be used as a bitmap.
42 enum ChangeSanitationResult {
43 // The change is valid and can be applied as-is.
44 VALID_CHANGE = 0,
45
46 // The change contained words to be added that are not valid.
47 DETECTED_INVALID_WORDS = 1,
48
49 // The change contained words to be added that are already in the dictionary.
50 DETECTED_DUPLICATE_WORDS = 2,
51
52 // The change contained words to be removed that are not in the dictionary.
53 DETECTED_MISSING_WORDS = 4,
54 };
55
56 // Loads the file at |file_path| into the |words| container. If the file has a
57 // valid checksum, then returns ChecksumStatus::VALID. If the file has an
58 // invalid checksum, then returns ChecksumStatus::INVALID and clears |words|.
LoadFile(const base::FilePath & file_path,WordList & words)59 ChecksumStatus LoadFile(const base::FilePath& file_path, WordList& words) {
60 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
61 words.clear();
62 std::string contents;
63 base::ReadFileToString(file_path, &contents);
64 size_t pos = contents.rfind(CHECKSUM_PREFIX);
65 if (pos != std::string::npos) {
66 std::string checksum = contents.substr(pos + strlen(CHECKSUM_PREFIX));
67 contents = contents.substr(0, pos);
68 if (checksum != base::MD5String(contents))
69 return INVALID_CHECKSUM;
70 }
71 TrimWhitespaceASCII(contents, TRIM_ALL, &contents);
72 base::SplitString(contents, '\n', &words);
73 return VALID_CHECKSUM;
74 }
75
76 // Returns true for invalid words and false for valid words.
IsInvalidWord(const std::string & word)77 bool IsInvalidWord(const std::string& word) {
78 std::string tmp;
79 return !IsStringUTF8(word) ||
80 word.length() >
81 chrome::spellcheck_common::MAX_CUSTOM_DICTIONARY_WORD_BYTES ||
82 word.empty() ||
83 TRIM_NONE != TrimWhitespaceASCII(word, TRIM_ALL, &tmp);
84 }
85
86 // Loads the custom spellcheck dictionary from |path| into |custom_words|. If
87 // the dictionary checksum is not valid, but backup checksum is valid, then
88 // restores the backup and loads that into |custom_words| instead. If the backup
89 // is invalid too, then clears |custom_words|. Must be called on the file
90 // thread.
LoadDictionaryFileReliably(WordList & custom_words,const base::FilePath & path)91 void LoadDictionaryFileReliably(WordList& custom_words,
92 const base::FilePath& path) {
93 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
94 // Load the contents and verify the checksum.
95 if (LoadFile(path, custom_words) == VALID_CHECKSUM)
96 return;
97 // Checksum is not valid. See if there's a backup.
98 base::FilePath backup = path.AddExtension(BACKUP_EXTENSION);
99 if (!base::PathExists(backup))
100 return;
101 // Load the backup and verify its checksum.
102 if (LoadFile(backup, custom_words) != VALID_CHECKSUM)
103 return;
104 // Backup checksum is valid. Restore the backup.
105 base::CopyFile(backup, path);
106 }
107
108 // Backs up the original dictionary, saves |custom_words| and its checksum into
109 // the custom spellcheck dictionary at |path|.
SaveDictionaryFileReliably(const WordList & custom_words,const base::FilePath & path)110 void SaveDictionaryFileReliably(
111 const WordList& custom_words,
112 const base::FilePath& path) {
113 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
114 std::stringstream content;
115 for (WordList::const_iterator it = custom_words.begin();
116 it != custom_words.end();
117 ++it) {
118 content << *it << '\n';
119 }
120 std::string checksum = base::MD5String(content.str());
121 content << CHECKSUM_PREFIX << checksum;
122 base::CopyFile(path, path.AddExtension(BACKUP_EXTENSION));
123 base::ImportantFileWriter::WriteFileAtomically(path, content.str());
124 }
125
126 // Removes duplicate and invalid words from |to_add| word list and sorts it.
127 // Looks for duplicates in both |to_add| and |existing| word lists. Returns a
128 // bitmap of |ChangeSanitationResult| values.
SanitizeWordsToAdd(const WordSet & existing,WordList & to_add)129 int SanitizeWordsToAdd(const WordSet& existing, WordList& to_add) {
130 // Do not add duplicate words.
131 std::sort(to_add.begin(), to_add.end());
132 WordList new_words = base::STLSetDifference<WordList>(to_add, existing);
133 new_words.erase(std::unique(new_words.begin(), new_words.end()),
134 new_words.end());
135 int result = VALID_CHANGE;
136 if (to_add.size() != new_words.size())
137 result |= DETECTED_DUPLICATE_WORDS;
138 // Do not add invalid words.
139 size_t size = new_words.size();
140 new_words.erase(std::remove_if(new_words.begin(),
141 new_words.end(),
142 IsInvalidWord),
143 new_words.end());
144 if (size != new_words.size())
145 result |= DETECTED_INVALID_WORDS;
146 // Save the sanitized words to be added.
147 std::swap(to_add, new_words);
148 return result;
149 }
150
151 // Removes word from |to_remove| that are missing from |existing| word list and
152 // sorts |to_remove|. Returns a bitmap of |ChangeSanitationResult| values.
SanitizeWordsToRemove(const WordSet & existing,WordList & to_remove)153 int SanitizeWordsToRemove(const WordSet& existing, WordList& to_remove) {
154 // Do not remove words that are missing from the dictionary.
155 std::sort(to_remove.begin(), to_remove.end());
156 WordList found_words;
157 std::set_intersection(existing.begin(),
158 existing.end(),
159 to_remove.begin(),
160 to_remove.end(),
161 std::back_inserter(found_words));
162 int result = VALID_CHANGE;
163 if (to_remove.size() > found_words.size())
164 result |= DETECTED_MISSING_WORDS;
165 // Save the sanitized words to be removed.
166 std::swap(to_remove, found_words);
167 return result;
168 }
169
170 } // namespace
171
172
Change()173 SpellcheckCustomDictionary::Change::Change() {
174 }
175
Change(const SpellcheckCustomDictionary::Change & other)176 SpellcheckCustomDictionary::Change::Change(
177 const SpellcheckCustomDictionary::Change& other)
178 : to_add_(other.to_add()),
179 to_remove_(other.to_remove()) {
180 }
181
Change(const WordList & to_add)182 SpellcheckCustomDictionary::Change::Change(const WordList& to_add)
183 : to_add_(to_add) {
184 }
185
~Change()186 SpellcheckCustomDictionary::Change::~Change() {
187 }
188
AddWord(const std::string & word)189 void SpellcheckCustomDictionary::Change::AddWord(const std::string& word) {
190 to_add_.push_back(word);
191 }
192
RemoveWord(const std::string & word)193 void SpellcheckCustomDictionary::Change::RemoveWord(const std::string& word) {
194 to_remove_.push_back(word);
195 }
196
Sanitize(const WordSet & words)197 int SpellcheckCustomDictionary::Change::Sanitize(const WordSet& words) {
198 int result = VALID_CHANGE;
199 if (!to_add_.empty())
200 result |= SanitizeWordsToAdd(words, to_add_);
201 if (!to_remove_.empty())
202 result |= SanitizeWordsToRemove(words, to_remove_);
203 return result;
204 }
205
to_add() const206 const WordList& SpellcheckCustomDictionary::Change::to_add() const {
207 return to_add_;
208 }
209
to_remove() const210 const WordList& SpellcheckCustomDictionary::Change::to_remove() const {
211 return to_remove_;
212 }
213
empty() const214 bool SpellcheckCustomDictionary::Change::empty() const {
215 return to_add_.empty() && to_remove_.empty();
216 }
217
SpellcheckCustomDictionary(const base::FilePath & path)218 SpellcheckCustomDictionary::SpellcheckCustomDictionary(
219 const base::FilePath& path)
220 : custom_dictionary_path_(),
221 is_loaded_(false),
222 weak_ptr_factory_(this) {
223 custom_dictionary_path_ =
224 path.Append(chrome::kCustomDictionaryFileName);
225 }
226
~SpellcheckCustomDictionary()227 SpellcheckCustomDictionary::~SpellcheckCustomDictionary() {
228 }
229
GetWords() const230 const WordSet& SpellcheckCustomDictionary::GetWords() const {
231 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
232 return words_;
233 }
234
AddWord(const std::string & word)235 bool SpellcheckCustomDictionary::AddWord(const std::string& word) {
236 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
237 Change dictionary_change;
238 dictionary_change.AddWord(word);
239 int result = dictionary_change.Sanitize(GetWords());
240 Apply(dictionary_change);
241 Notify(dictionary_change);
242 Sync(dictionary_change);
243 Save(dictionary_change);
244 return result == VALID_CHANGE;
245 }
246
RemoveWord(const std::string & word)247 bool SpellcheckCustomDictionary::RemoveWord(const std::string& word) {
248 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
249 Change dictionary_change;
250 dictionary_change.RemoveWord(word);
251 int result = dictionary_change.Sanitize(GetWords());
252 Apply(dictionary_change);
253 Notify(dictionary_change);
254 Sync(dictionary_change);
255 Save(dictionary_change);
256 return result == VALID_CHANGE;
257 }
258
HasWord(const std::string & word) const259 bool SpellcheckCustomDictionary::HasWord(const std::string& word) const {
260 return !!words_.count(word);
261 }
262
AddObserver(Observer * observer)263 void SpellcheckCustomDictionary::AddObserver(Observer* observer) {
264 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
265 observers_.AddObserver(observer);
266 }
267
RemoveObserver(Observer * observer)268 void SpellcheckCustomDictionary::RemoveObserver(Observer* observer) {
269 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
270 observers_.RemoveObserver(observer);
271 }
272
IsLoaded()273 bool SpellcheckCustomDictionary::IsLoaded() {
274 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
275 return is_loaded_;
276 }
277
IsSyncing()278 bool SpellcheckCustomDictionary::IsSyncing() {
279 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
280 return !!sync_processor_.get();
281 }
282
Load()283 void SpellcheckCustomDictionary::Load() {
284 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
285 BrowserThread::PostTaskAndReplyWithResult(
286 BrowserThread::FILE,
287 FROM_HERE,
288 base::Bind(&SpellcheckCustomDictionary::LoadDictionaryFile,
289 custom_dictionary_path_),
290 base::Bind(&SpellcheckCustomDictionary::OnLoaded,
291 weak_ptr_factory_.GetWeakPtr()));
292 }
293
MergeDataAndStartSyncing(syncer::ModelType type,const syncer::SyncDataList & initial_sync_data,scoped_ptr<syncer::SyncChangeProcessor> sync_processor,scoped_ptr<syncer::SyncErrorFactory> sync_error_handler)294 syncer::SyncMergeResult SpellcheckCustomDictionary::MergeDataAndStartSyncing(
295 syncer::ModelType type,
296 const syncer::SyncDataList& initial_sync_data,
297 scoped_ptr<syncer::SyncChangeProcessor> sync_processor,
298 scoped_ptr<syncer::SyncErrorFactory> sync_error_handler) {
299 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
300 DCHECK(!sync_processor_.get());
301 DCHECK(!sync_error_handler_.get());
302 DCHECK(sync_processor.get());
303 DCHECK(sync_error_handler.get());
304 DCHECK_EQ(syncer::DICTIONARY, type);
305 sync_processor_ = sync_processor.Pass();
306 sync_error_handler_ = sync_error_handler.Pass();
307
308 // Build a list of words to add locally.
309 WordList to_add_locally;
310 for (syncer::SyncDataList::const_iterator it = initial_sync_data.begin();
311 it != initial_sync_data.end();
312 ++it) {
313 DCHECK_EQ(syncer::DICTIONARY, it->GetDataType());
314 to_add_locally.push_back(it->GetSpecifics().dictionary().word());
315 }
316
317 // Add remote words locally.
318 Change to_change_locally(to_add_locally);
319 to_change_locally.Sanitize(GetWords());
320 Apply(to_change_locally);
321 Notify(to_change_locally);
322 Save(to_change_locally);
323
324 // Add as many as possible local words remotely.
325 std::sort(to_add_locally.begin(), to_add_locally.end());
326 WordList to_add_remotely = base::STLSetDifference<WordList>(words_,
327 to_add_locally);
328
329 // Send local changes to the sync server.
330 Change to_change_remotely(to_add_remotely);
331 syncer::SyncMergeResult result(type);
332 result.set_error(Sync(to_change_remotely));
333 return result;
334 }
335
StopSyncing(syncer::ModelType type)336 void SpellcheckCustomDictionary::StopSyncing(syncer::ModelType type) {
337 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
338 DCHECK_EQ(syncer::DICTIONARY, type);
339 sync_processor_.reset();
340 sync_error_handler_.reset();
341 }
342
GetAllSyncData(syncer::ModelType type) const343 syncer::SyncDataList SpellcheckCustomDictionary::GetAllSyncData(
344 syncer::ModelType type) const {
345 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
346 DCHECK_EQ(syncer::DICTIONARY, type);
347 syncer::SyncDataList data;
348 std::string word;
349 size_t i = 0;
350 for (WordSet::const_iterator it = words_.begin();
351 it != words_.end() &&
352 i < chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS;
353 ++it, ++i) {
354 word = *it;
355 sync_pb::EntitySpecifics specifics;
356 specifics.mutable_dictionary()->set_word(word);
357 data.push_back(syncer::SyncData::CreateLocalData(word, word, specifics));
358 }
359 return data;
360 }
361
ProcessSyncChanges(const tracked_objects::Location & from_here,const syncer::SyncChangeList & change_list)362 syncer::SyncError SpellcheckCustomDictionary::ProcessSyncChanges(
363 const tracked_objects::Location& from_here,
364 const syncer::SyncChangeList& change_list) {
365 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
366 Change dictionary_change;
367 for (syncer::SyncChangeList::const_iterator it = change_list.begin();
368 it != change_list.end();
369 ++it) {
370 DCHECK(it->IsValid());
371 std::string word = it->sync_data().GetSpecifics().dictionary().word();
372 switch (it->change_type()) {
373 case syncer::SyncChange::ACTION_ADD:
374 dictionary_change.AddWord(word);
375 break;
376 case syncer::SyncChange::ACTION_DELETE:
377 dictionary_change.RemoveWord(word);
378 break;
379 default:
380 return sync_error_handler_->CreateAndUploadError(
381 FROM_HERE,
382 "Processing sync changes failed on change type " +
383 syncer::SyncChange::ChangeTypeToString(it->change_type()));
384 }
385 }
386
387 dictionary_change.Sanitize(GetWords());
388 Apply(dictionary_change);
389 Notify(dictionary_change);
390 Save(dictionary_change);
391
392 return syncer::SyncError();
393 }
394
395 // static
LoadDictionaryFile(const base::FilePath & path)396 WordList SpellcheckCustomDictionary::LoadDictionaryFile(
397 const base::FilePath& path) {
398 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
399 WordList words;
400 LoadDictionaryFileReliably(words, path);
401 if (!words.empty() && VALID_CHANGE != SanitizeWordsToAdd(WordSet(), words))
402 SaveDictionaryFileReliably(words, path);
403 SpellCheckHostMetrics::RecordCustomWordCountStats(words.size());
404 return words;
405 }
406
407 // static
UpdateDictionaryFile(const SpellcheckCustomDictionary::Change & dictionary_change,const base::FilePath & path)408 void SpellcheckCustomDictionary::UpdateDictionaryFile(
409 const SpellcheckCustomDictionary::Change& dictionary_change,
410 const base::FilePath& path) {
411 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
412 if (dictionary_change.empty())
413 return;
414
415 WordList custom_words;
416 LoadDictionaryFileReliably(custom_words, path);
417
418 // Add words.
419 custom_words.insert(custom_words.end(),
420 dictionary_change.to_add().begin(),
421 dictionary_change.to_add().end());
422
423 // Remove words.
424 std::sort(custom_words.begin(), custom_words.end());
425 WordList remaining =
426 base::STLSetDifference<WordList>(custom_words,
427 dictionary_change.to_remove());
428 std::swap(custom_words, remaining);
429
430 SaveDictionaryFileReliably(custom_words, path);
431 }
432
OnLoaded(WordList custom_words)433 void SpellcheckCustomDictionary::OnLoaded(WordList custom_words) {
434 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
435 Change dictionary_change(custom_words);
436 dictionary_change.Sanitize(GetWords());
437 Apply(dictionary_change);
438 Sync(dictionary_change);
439 is_loaded_ = true;
440 FOR_EACH_OBSERVER(Observer, observers_, OnCustomDictionaryLoaded());
441 }
442
Apply(const SpellcheckCustomDictionary::Change & dictionary_change)443 void SpellcheckCustomDictionary::Apply(
444 const SpellcheckCustomDictionary::Change& dictionary_change) {
445 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
446 if (!dictionary_change.to_add().empty()) {
447 words_.insert(dictionary_change.to_add().begin(),
448 dictionary_change.to_add().end());
449 }
450 if (!dictionary_change.to_remove().empty()) {
451 WordSet updated_words =
452 base::STLSetDifference<WordSet>(words_,
453 dictionary_change.to_remove());
454 std::swap(words_, updated_words);
455 }
456 }
457
Save(const SpellcheckCustomDictionary::Change & dictionary_change)458 void SpellcheckCustomDictionary::Save(
459 const SpellcheckCustomDictionary::Change& dictionary_change) {
460 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
461 BrowserThread::PostTask(
462 BrowserThread::FILE,
463 FROM_HERE,
464 base::Bind(&SpellcheckCustomDictionary::UpdateDictionaryFile,
465 dictionary_change,
466 custom_dictionary_path_));
467 }
468
Sync(const SpellcheckCustomDictionary::Change & dictionary_change)469 syncer::SyncError SpellcheckCustomDictionary::Sync(
470 const SpellcheckCustomDictionary::Change& dictionary_change) {
471 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
472 syncer::SyncError error;
473 if (!IsSyncing() || dictionary_change.empty())
474 return error;
475
476 // The number of words on the sync server should not exceed the limits.
477 int server_size = static_cast<int>(words_.size()) -
478 static_cast<int>(dictionary_change.to_add().size());
479 int max_upload_size = std::max(
480 0,
481 static_cast<int>(
482 chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS) -
483 server_size);
484 int upload_size = std::min(
485 static_cast<int>(dictionary_change.to_add().size()),
486 max_upload_size);
487
488 syncer::SyncChangeList sync_change_list;
489 int i = 0;
490
491 for (WordList::const_iterator it = dictionary_change.to_add().begin();
492 it != dictionary_change.to_add().end() && i < upload_size;
493 ++it, ++i) {
494 std::string word = *it;
495 sync_pb::EntitySpecifics specifics;
496 specifics.mutable_dictionary()->set_word(word);
497 sync_change_list.push_back(syncer::SyncChange(
498 FROM_HERE,
499 syncer::SyncChange::ACTION_ADD,
500 syncer::SyncData::CreateLocalData(word, word, specifics)));
501 }
502
503 for (WordList::const_iterator it = dictionary_change.to_remove().begin();
504 it != dictionary_change.to_remove().end();
505 ++it) {
506 std::string word = *it;
507 sync_pb::EntitySpecifics specifics;
508 specifics.mutable_dictionary()->set_word(word);
509 sync_change_list.push_back(syncer::SyncChange(
510 FROM_HERE,
511 syncer::SyncChange::ACTION_DELETE,
512 syncer::SyncData::CreateLocalData(word, word, specifics)));
513 }
514
515 // Send the changes to the sync processor.
516 error = sync_processor_->ProcessSyncChanges(FROM_HERE, sync_change_list);
517 if (error.IsSet())
518 return error;
519
520 // Turn off syncing of this dictionary if the server already has the maximum
521 // number of words.
522 if (words_.size() > chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS)
523 StopSyncing(syncer::DICTIONARY);
524
525 return error;
526 }
527
Notify(const SpellcheckCustomDictionary::Change & dictionary_change)528 void SpellcheckCustomDictionary::Notify(
529 const SpellcheckCustomDictionary::Change& dictionary_change) {
530 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
531 if (!IsLoaded() || dictionary_change.empty())
532 return;
533 FOR_EACH_OBSERVER(Observer,
534 observers_,
535 OnCustomDictionaryChanged(dictionary_change));
536 }
537