• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/utf_offset_string_conversions.h"
6 
7 #include "base/string_piece.h"
8 #include "base/utf_string_conversion_utils.h"
9 
10 using base::PrepareForUTF16Or32Output;
11 using base::ReadUnicodeCharacter;
12 using base::WriteUnicodeCharacter;
13 
14 // Generalized Unicode converter -----------------------------------------------
15 
16 // Converts the given source Unicode character type to the given destination
17 // Unicode character type as a STL string. The given input buffer and size
18 // determine the source, and the given output STL string will be replaced by
19 // the result.
20 template<typename SRC_CHAR>
ConvertUnicode(const SRC_CHAR * src,size_t src_len,std::wstring * output,size_t * offset_for_adjustment)21 bool ConvertUnicode(const SRC_CHAR* src,
22                     size_t src_len,
23                     std::wstring* output,
24                     size_t* offset_for_adjustment) {
25   size_t output_offset =
26       (offset_for_adjustment && *offset_for_adjustment < src_len) ?
27           *offset_for_adjustment : std::wstring::npos;
28 
29   // ICU requires 32-bit numbers.
30   bool success = true;
31   int32 src_len32 = static_cast<int32>(src_len);
32   for (int32 i = 0; i < src_len32; i++) {
33     uint32 code_point;
34     size_t original_i = i;
35     size_t chars_written = 0;
36     if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
37       chars_written = WriteUnicodeCharacter(code_point, output);
38     } else {
39       chars_written = WriteUnicodeCharacter(0xFFFD, output);
40       success = false;
41     }
42     if ((output_offset != std::wstring::npos) &&
43         (*offset_for_adjustment > original_i)) {
44       // NOTE: ReadUnicodeCharacter() adjusts |i| to point _at_ the last
45       // character read, not after it (so that incrementing it in the loop
46       // increment will place it at the right location), so we need to account
47       // for that in determining the amount that was read.
48       if (*offset_for_adjustment <= static_cast<size_t>(i))
49         output_offset = std::wstring::npos;
50       else
51         output_offset += chars_written - (i - original_i + 1);
52     }
53   }
54 
55   if (offset_for_adjustment)
56     *offset_for_adjustment = output_offset;
57   return success;
58 }
59 
60 // UTF-8 <-> Wide --------------------------------------------------------------
61 
UTF8ToWideAndAdjustOffset(const char * src,size_t src_len,std::wstring * output,size_t * offset_for_adjustment)62 bool UTF8ToWideAndAdjustOffset(const char* src,
63                                size_t src_len,
64                                std::wstring* output,
65                                size_t* offset_for_adjustment) {
66   PrepareForUTF16Or32Output(src, src_len, output);
67   return ConvertUnicode(src, src_len, output, offset_for_adjustment);
68 }
69 
UTF8ToWideAndAdjustOffset(const base::StringPiece & utf8,size_t * offset_for_adjustment)70 std::wstring UTF8ToWideAndAdjustOffset(const base::StringPiece& utf8,
71                                        size_t* offset_for_adjustment) {
72   std::wstring ret;
73   UTF8ToWideAndAdjustOffset(utf8.data(), utf8.length(), &ret,
74                             offset_for_adjustment);
75   return ret;
76 }
77 
78 // UTF-16 <-> Wide -------------------------------------------------------------
79 
80 #if defined(WCHAR_T_IS_UTF16)
81 
82 // When wide == UTF-16, then conversions are a NOP.
UTF16ToWideAndAdjustOffset(const char16 * src,size_t src_len,std::wstring * output,size_t * offset_for_adjustment)83 bool UTF16ToWideAndAdjustOffset(const char16* src,
84                                 size_t src_len,
85                                 std::wstring* output,
86                                 size_t* offset_for_adjustment) {
87   output->assign(src, src_len);
88   if (offset_for_adjustment && (*offset_for_adjustment >= src_len))
89     *offset_for_adjustment = std::wstring::npos;
90   return true;
91 }
92 
UTF16ToWideAndAdjustOffset(const string16 & utf16,size_t * offset_for_adjustment)93 std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
94                                         size_t* offset_for_adjustment) {
95   if (offset_for_adjustment && (*offset_for_adjustment >= utf16.length()))
96     *offset_for_adjustment = std::wstring::npos;
97   return utf16;
98 }
99 
100 #elif defined(WCHAR_T_IS_UTF32)
101 
UTF16ToWideAndAdjustOffset(const char16 * src,size_t src_len,std::wstring * output,size_t * offset_for_adjustment)102 bool UTF16ToWideAndAdjustOffset(const char16* src,
103                                 size_t src_len,
104                                 std::wstring* output,
105                                 size_t* offset_for_adjustment) {
106   output->clear();
107   // Assume that normally we won't have any non-BMP characters so the counts
108   // will be the same.
109   output->reserve(src_len);
110   return ConvertUnicode(src, src_len, output, offset_for_adjustment);
111 }
112 
UTF16ToWideAndAdjustOffset(const string16 & utf16,size_t * offset_for_adjustment)113 std::wstring UTF16ToWideAndAdjustOffset(const string16& utf16,
114                                         size_t* offset_for_adjustment) {
115   std::wstring ret;
116   UTF16ToWideAndAdjustOffset(utf16.data(), utf16.length(), &ret,
117                              offset_for_adjustment);
118   return ret;
119 }
120 
121 #endif  // defined(WCHAR_T_IS_UTF32)
122