• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "cpp/src/util/canonicalize_string.h"
6 
7 #include "base/logging.h"
8 #include "cpp/include/libaddressinput/util/scoped_ptr.h"
9 #include "third_party/icu/source/common/unicode/errorcode.h"
10 #include "third_party/icu/source/common/unicode/locid.h"
11 #include "third_party/icu/source/common/unicode/unistr.h"
12 #include "third_party/icu/source/common/unicode/utypes.h"
13 #include "third_party/icu/source/i18n/unicode/coll.h"
14 
15 namespace i18n {
16 namespace addressinput {
17 
18 namespace {
19 
20 class ChromeStringCanonicalizer : public StringCanonicalizer {
21  public:
ChromeStringCanonicalizer()22   ChromeStringCanonicalizer()
23       : error_code_(U_ZERO_ERROR),
24         collator_(
25             icu::Collator::createInstance(
26                 icu::Locale::getRoot(), error_code_)) {
27     collator_->setStrength(icu::Collator::PRIMARY);
28     DCHECK(U_SUCCESS(error_code_));
29   }
30 
~ChromeStringCanonicalizer()31   virtual ~ChromeStringCanonicalizer() {}
32 
33   // StringCanonicalizer implementation.
CanonicalizeString(const std::string & original)34   virtual std::string CanonicalizeString(const std::string& original) {
35     // Returns a canonical version of the string that can be used for comparing
36     // strings regardless of diacritics and capitalization.
37     //    CanonicalizeString("Texas") == CanonicalizeString("T\u00E9xas");
38     //    CanonicalizeString("Texas") == CanonicalizeString("teXas");
39     //    CanonicalizeString("Texas") != CanonicalizeString("California");
40     //
41     // The output is not human-readable.
42     //    CanonicalizeString("Texas") != "Texas";
43     icu::UnicodeString icu_str(
44         original.c_str(), static_cast<int32_t>(original.length()));
45     int32_t buffer_size = collator_->getSortKey(icu_str, NULL, 0);
46     scoped_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
47     DCHECK(buffer.get());
48     int32_t filled_size =
49         collator_->getSortKey(icu_str, buffer.get(), buffer_size);
50     DCHECK_EQ(buffer_size, filled_size);
51     return std::string(reinterpret_cast<const char*>(buffer.get()));
52   }
53 
54  private:
55   UErrorCode error_code_;
56   scoped_ptr<icu::Collator> collator_;
57 
58   DISALLOW_COPY_AND_ASSIGN(ChromeStringCanonicalizer);
59 };
60 
61 }  // namespace
62 
63 // static
Build()64 scoped_ptr<StringCanonicalizer> StringCanonicalizer::Build() {
65   return scoped_ptr<StringCanonicalizer>(new ChromeStringCanonicalizer);
66 }
67 
68 }  // namespace addressinput
69 }  // namespace i18n
70