1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 #include <string.h>
31
32 #include "common/convert_UTF.h"
33 #include "common/scoped_ptr.h"
34 #include "common/string_conversion.h"
35 #include "common/using_std_string.h"
36
37 namespace google_breakpad {
38
39 using std::vector;
40
UTF8ToUTF16(const char * in,vector<uint16_t> * out)41 void UTF8ToUTF16(const char *in, vector<uint16_t> *out) {
42 size_t source_length = strlen(in);
43 const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in);
44 const UTF8 *source_end_ptr = source_ptr + source_length;
45 // Erase the contents and zero fill to the expected size
46 out->clear();
47 out->insert(out->begin(), source_length, 0);
48 uint16_t *target_ptr = &(*out)[0];
49 uint16_t *target_end_ptr = target_ptr + out->capacity();
50 ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
51 &target_ptr, target_end_ptr,
52 strictConversion);
53
54 // Resize to be the size of the # of converted characters + NULL
55 out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
56 }
57
UTF8ToUTF16Char(const char * in,int in_length,uint16_t out[2])58 int UTF8ToUTF16Char(const char *in, int in_length, uint16_t out[2]) {
59 const UTF8 *source_ptr = reinterpret_cast<const UTF8 *>(in);
60 const UTF8 *source_end_ptr = source_ptr + 1;
61 uint16_t *target_ptr = out;
62 uint16_t *target_end_ptr = target_ptr + 2;
63 out[0] = out[1] = 0;
64
65 // Process one character at a time
66 while (1) {
67 ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
68 &target_ptr, target_end_ptr,
69 strictConversion);
70
71 if (result == conversionOK)
72 return static_cast<int>(source_ptr - reinterpret_cast<const UTF8 *>(in));
73
74 // Add another character to the input stream and try again
75 source_ptr = reinterpret_cast<const UTF8 *>(in);
76 ++source_end_ptr;
77
78 if (source_end_ptr > reinterpret_cast<const UTF8 *>(in) + in_length)
79 break;
80 }
81
82 return 0;
83 }
84
UTF32ToUTF16(const wchar_t * in,vector<uint16_t> * out)85 void UTF32ToUTF16(const wchar_t *in, vector<uint16_t> *out) {
86 size_t source_length = wcslen(in);
87 const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(in);
88 const UTF32 *source_end_ptr = source_ptr + source_length;
89 // Erase the contents and zero fill to the expected size
90 out->clear();
91 out->insert(out->begin(), source_length, 0);
92 uint16_t *target_ptr = &(*out)[0];
93 uint16_t *target_end_ptr = target_ptr + out->capacity();
94 ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
95 &target_ptr, target_end_ptr,
96 strictConversion);
97
98 // Resize to be the size of the # of converted characters + NULL
99 out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
100 }
101
UTF32ToUTF16Char(wchar_t in,uint16_t out[2])102 void UTF32ToUTF16Char(wchar_t in, uint16_t out[2]) {
103 const UTF32 *source_ptr = reinterpret_cast<const UTF32 *>(&in);
104 const UTF32 *source_end_ptr = source_ptr + 1;
105 uint16_t *target_ptr = out;
106 uint16_t *target_end_ptr = target_ptr + 2;
107 out[0] = out[1] = 0;
108 ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
109 &target_ptr, target_end_ptr,
110 strictConversion);
111
112 if (result != conversionOK) {
113 out[0] = out[1] = 0;
114 }
115 }
116
Swap(uint16_t value)117 static inline uint16_t Swap(uint16_t value) {
118 return (value >> 8) | static_cast<uint16_t>(value << 8);
119 }
120
UTF16ToUTF8(const vector<uint16_t> & in,bool swap)121 string UTF16ToUTF8(const vector<uint16_t> &in, bool swap) {
122 const UTF16 *source_ptr = &in[0];
123 scoped_array<uint16_t> source_buffer;
124
125 // If we're to swap, we need to make a local copy and swap each byte pair
126 if (swap) {
127 int idx = 0;
128 source_buffer.reset(new uint16_t[in.size()]);
129 UTF16 *source_buffer_ptr = source_buffer.get();
130 for (vector<uint16_t>::const_iterator it = in.begin();
131 it != in.end(); ++it, ++idx)
132 source_buffer_ptr[idx] = Swap(*it);
133
134 source_ptr = source_buffer.get();
135 }
136
137 // The maximum expansion would be 4x the size of the input string.
138 const UTF16 *source_end_ptr = source_ptr + in.size();
139 size_t target_capacity = in.size() * 4;
140 scoped_array<UTF8> target_buffer(new UTF8[target_capacity]);
141 UTF8 *target_ptr = target_buffer.get();
142 UTF8 *target_end_ptr = target_ptr + target_capacity;
143 ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr,
144 &target_ptr, target_end_ptr,
145 strictConversion);
146
147 if (result == conversionOK) {
148 const char *targetPtr = reinterpret_cast<const char *>(target_buffer.get());
149 return targetPtr;
150 }
151
152 return "";
153 }
154
155 } // namespace google_breakpad
156