// Copyright 2014 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "cpp/src/util/canonicalize_string.h" #include "base/logging.h" #include "cpp/include/libaddressinput/util/scoped_ptr.h" #include "third_party/icu/source/common/unicode/errorcode.h" #include "third_party/icu/source/common/unicode/locid.h" #include "third_party/icu/source/common/unicode/unistr.h" #include "third_party/icu/source/common/unicode/utypes.h" #include "third_party/icu/source/i18n/unicode/coll.h" namespace i18n { namespace addressinput { namespace { class ChromeStringCanonicalizer : public StringCanonicalizer { public: ChromeStringCanonicalizer() : error_code_(U_ZERO_ERROR), collator_( icu::Collator::createInstance( icu::Locale::getRoot(), error_code_)) { collator_->setStrength(icu::Collator::PRIMARY); DCHECK(U_SUCCESS(error_code_)); } virtual ~ChromeStringCanonicalizer() {} // StringCanonicalizer implementation. virtual std::string CanonicalizeString(const std::string& original) { // Returns a canonical version of the string that can be used for comparing // strings regardless of diacritics and capitalization. // CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas"); // CanonicalizeString("Texas") == CanonicalizeString("teXas"); // CanonicalizeString("Texas") != CanonicalizeString("California"); // // The output is not human-readable. // CanonicalizeString("Texas") != "Texas"; icu::UnicodeString icu_str( original.c_str(), static_cast(original.length())); int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0); scoped_ptr buffer(new uint8_t[buffer_size]); DCHECK(buffer.get()); int32_t filled_size = collator_->getSortKey(icu_str, buffer.get(), buffer_size); DCHECK_EQ(buffer_size, filled_size); return std::string(reinterpret_cast(buffer.get())); } private: UErrorCode error_code_; scoped_ptr collator_; DISALLOW_COPY_AND_ASSIGN(ChromeStringCanonicalizer); }; } // namespace // static scoped_ptr StringCanonicalizer::Build() { return scoped_ptr(new ChromeStringCanonicalizer); } } // namespace addressinput } // namespace i18n