1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "cpp/src/util/canonicalize_string.h" 6 7 #include "base/logging.h" 8 #include "cpp/include/libaddressinput/util/scoped_ptr.h" 9 #include "third_party/icu/source/common/unicode/errorcode.h" 10 #include "third_party/icu/source/common/unicode/locid.h" 11 #include "third_party/icu/source/common/unicode/unistr.h" 12 #include "third_party/icu/source/common/unicode/utypes.h" 13 #include "third_party/icu/source/i18n/unicode/coll.h" 14 15 namespace i18n { 16 namespace addressinput { 17 18 namespace { 19 20 class ChromeStringCanonicalizer : public StringCanonicalizer { 21 public: ChromeStringCanonicalizer()22 ChromeStringCanonicalizer() 23 : error_code_(U_ZERO_ERROR), 24 collator_( 25 icu::Collator::createInstance( 26 icu::Locale::getRoot(), error_code_)) { 27 collator_->setStrength(icu::Collator::PRIMARY); 28 DCHECK(U_SUCCESS(error_code_)); 29 } 30 ~ChromeStringCanonicalizer()31 virtual ~ChromeStringCanonicalizer() {} 32 33 // StringCanonicalizer implementation. CanonicalizeString(const std::string & original)34 virtual std::string CanonicalizeString(const std::string& original) { 35 // Returns a canonical version of the string that can be used for comparing 36 // strings regardless of diacritics and capitalization. 37 // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas"); 38 // CanonicalizeString("Texas") == CanonicalizeString("teXas"); 39 // CanonicalizeString("Texas") != CanonicalizeString("California"); 40 // 41 // The output is not human-readable. 42 // CanonicalizeString("Texas") != "Texas"; 43 icu::UnicodeString icu_str( 44 original.c_str(), static_cast<int32_t>(original.length())); 45 int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0); 46 scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]); 47 DCHECK(buffer.get()); 48 int32_t filled_size = 49 collator_->getSortKey(icu_str, buffer.get(), buffer_size); 50 DCHECK_EQ(buffer_size, filled_size); 51 return std::string(reinterpret_cast<const char*>(buffer.get())); 52 } 53 54 private: 55 UErrorCode error_code_; 56 scoped_ptr<icu::Collator> collator_; 57 58 DISALLOW_COPY_AND_ASSIGN(ChromeStringCanonicalizer); 59 }; 60 61 } // namespace 62 63 // static Build()64scoped_ptr<StringCanonicalizer> StringCanonicalizer::Build() { 65 return scoped_ptr<StringCanonicalizer>(new ChromeStringCanonicalizer); 66 } 67 68 } // namespace addressinput 69 } // namespace i18n 70