1 // Common/StringConvert.cpp
2
3 #include "StdAfx.h"
4
5 #include "StringConvert.h"
6
7 #ifndef _WIN32
8 #include <stdlib.h>
9 #endif
10
11 static const char k_DefultChar = '_';
12
13 #ifdef _WIN32
14
15 /*
16 MultiByteToWideChar(CodePage, DWORD dwFlags,
17 LPCSTR lpMultiByteStr, int cbMultiByte,
18 LPWSTR lpWideCharStr, int cchWideChar)
19
20 if (cbMultiByte == 0)
21 return: 0. ERR: ERROR_INVALID_PARAMETER
22
23 if (cchWideChar == 0)
24 return: the required buffer size in characters.
25
26 if (supplied buffer size was not large enough)
27 return: 0. ERR: ERROR_INSUFFICIENT_BUFFER
28 The number of filled characters in lpWideCharStr can be smaller than cchWideChar (if last character is complex)
29
30 If there are illegal characters:
31 if MB_ERR_INVALID_CHARS is set in dwFlags:
32 - the function stops conversion on illegal character.
33 - Return: 0. ERR: ERROR_NO_UNICODE_TRANSLATION.
34
35 if MB_ERR_INVALID_CHARS is NOT set in dwFlags:
36 before Vista: illegal character is dropped (skipped). WinXP-64: GetLastError() returns 0.
37 in Vista+: illegal character is not dropped (MSDN). Undocumented: illegal
38 character is converted to U+FFFD, which is REPLACEMENT CHARACTER.
39 */
40
41
MultiByteToUnicodeString2(UString & dest,const AString & src,UINT codePage)42 void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
43 {
44 dest.Empty();
45 if (src.IsEmpty())
46 return;
47 {
48 /*
49 wchar_t *d = dest.GetBuf(src.Len());
50 const char *s = (const char *)src;
51 unsigned i;
52
53 for (i = 0;;)
54 {
55 Byte c = (Byte)s[i];
56 if (c >= 0x80 || c == 0)
57 break;
58 d[i++] = (wchar_t)c;
59 }
60
61 if (i != src.Len())
62 {
63 unsigned len = MultiByteToWideChar(codePage, 0, s + i,
64 src.Len() - i, d + i,
65 src.Len() + 1 - i);
66 if (len == 0)
67 throw 282228;
68 i += len;
69 }
70
71 d[i] = 0;
72 dest.ReleaseBuf_SetLen(i);
73 */
74 unsigned len = MultiByteToWideChar(codePage, 0, src, src.Len(), NULL, 0);
75 if (len == 0)
76 {
77 if (GetLastError() != 0)
78 throw 282228;
79 }
80 else
81 {
82 len = MultiByteToWideChar(codePage, 0, src, src.Len(), dest.GetBuf(len), len);
83 if (len == 0)
84 throw 282228;
85 dest.ReleaseBuf_SetEnd(len);
86 }
87 }
88 }
89
90 /*
91 int WideCharToMultiByte(
92 UINT CodePage, DWORD dwFlags,
93 LPCWSTR lpWideCharStr, int cchWideChar,
94 LPSTR lpMultiByteStr, int cbMultiByte,
95 LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar);
96
97 if (lpDefaultChar == NULL),
98 - it uses system default value.
99
100 if (CodePage == CP_UTF7 || CodePage == CP_UTF8)
101 if (lpDefaultChar != NULL || lpUsedDefaultChar != NULL)
102 return: 0. ERR: ERROR_INVALID_PARAMETER.
103
104 The function operates most efficiently, if (lpDefaultChar == NULL && lpUsedDefaultChar == NULL)
105
106 */
107
UnicodeStringToMultiByte2(AString & dest,const UString & src,UINT codePage,char defaultChar,bool & defaultCharWasUsed)108 static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
109 {
110 dest.Empty();
111 defaultCharWasUsed = false;
112 if (src.IsEmpty())
113 return;
114 {
115 /*
116 unsigned numRequiredBytes = src.Len() * 2;
117 char *d = dest.GetBuf(numRequiredBytes);
118 const wchar_t *s = (const wchar_t *)src;
119 unsigned i;
120
121 for (i = 0;;)
122 {
123 wchar_t c = s[i];
124 if (c >= 0x80 || c == 0)
125 break;
126 d[i++] = (char)c;
127 }
128
129 if (i != src.Len())
130 {
131 BOOL defUsed = FALSE;
132 defaultChar = defaultChar;
133
134 bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
135 unsigned len = WideCharToMultiByte(codePage, 0, s + i, src.Len() - i,
136 d + i, numRequiredBytes + 1 - i,
137 (isUtf ? NULL : &defaultChar),
138 (isUtf ? NULL : &defUsed));
139 defaultCharWasUsed = (defUsed != FALSE);
140 if (len == 0)
141 throw 282229;
142 i += len;
143 }
144
145 d[i] = 0;
146 dest.ReleaseBuf_SetLen(i);
147 */
148
149 /*
150 if (codePage != CP_UTF7)
151 {
152 const wchar_t *s = (const wchar_t *)src;
153 unsigned i;
154 for (i = 0;; i++)
155 {
156 wchar_t c = s[i];
157 if (c >= 0x80 || c == 0)
158 break;
159 }
160
161 if (s[i] == 0)
162 {
163 char *d = dest.GetBuf(src.Len());
164 for (i = 0;;)
165 {
166 wchar_t c = s[i];
167 if (c == 0)
168 break;
169 d[i++] = (char)c;
170 }
171 d[i] = 0;
172 dest.ReleaseBuf_SetLen(i);
173 return;
174 }
175 }
176 */
177
178 unsigned len = WideCharToMultiByte(codePage, 0, src, src.Len(), NULL, 0, NULL, NULL);
179 if (len == 0)
180 {
181 if (GetLastError() != 0)
182 throw 282228;
183 }
184 else
185 {
186 BOOL defUsed = FALSE;
187 bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
188 // defaultChar = defaultChar;
189 len = WideCharToMultiByte(codePage, 0, src, src.Len(),
190 dest.GetBuf(len), len,
191 (isUtf ? NULL : &defaultChar),
192 (isUtf ? NULL : &defUsed)
193 );
194 if (!isUtf)
195 defaultCharWasUsed = (defUsed != FALSE);
196 if (len == 0)
197 throw 282228;
198 dest.ReleaseBuf_SetEnd(len);
199 }
200 }
201 }
202
203 /*
204 #ifndef UNDER_CE
205 AString SystemStringToOemString(const CSysString &src)
206 {
207 AString dest;
208 const unsigned len = src.Len() * 2;
209 CharToOem(src, dest.GetBuf(len));
210 dest.ReleaseBuf_CalcLen(len);
211 return dest;
212 }
213 #endif
214 */
215
216 #else
217
MultiByteToUnicodeString2(UString & dest,const AString & src,UINT)218 void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePage */)
219 {
220 dest.Empty();
221 if (src.IsEmpty())
222 return;
223
224 size_t limit = ((size_t)src.Len() + 1) * 2;
225 wchar_t *d = dest.GetBuf((unsigned)limit);
226 size_t len = mbstowcs(d, src, limit);
227 if (len != (size_t)-1)
228 {
229 dest.ReleaseBuf_SetEnd((unsigned)len);
230 return;
231 }
232
233 {
234 unsigned i;
235 const char *s = (const char *)src;
236 for (i = 0;;)
237 {
238 Byte c = (Byte)s[i];
239 if (c == 0)
240 break;
241 d[i++] = (wchar_t)c;
242 }
243 d[i] = 0;
244 dest.ReleaseBuf_SetLen(i);
245 }
246 }
247
UnicodeStringToMultiByte2(AString & dest,const UString & src,UINT,char defaultChar,bool & defaultCharWasUsed)248 static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT /* codePage */, char defaultChar, bool &defaultCharWasUsed)
249 {
250 dest.Empty();
251 defaultCharWasUsed = false;
252 if (src.IsEmpty())
253 return;
254
255 size_t limit = ((size_t)src.Len() + 1) * 6;
256 char *d = dest.GetBuf((unsigned)limit);
257 size_t len = wcstombs(d, src, limit);
258 if (len != (size_t)-1)
259 {
260 dest.ReleaseBuf_SetEnd((unsigned)len);
261 return;
262 }
263
264 {
265 const wchar_t *s = (const wchar_t *)src;
266 unsigned i;
267 for (i = 0;;)
268 {
269 wchar_t c = s[i];
270 if (c == 0)
271 break;
272 if (c >= 0x100)
273 {
274 c = defaultChar;
275 defaultCharWasUsed = true;
276 }
277 d[i++] = (char)c;
278 }
279 d[i] = 0;
280 dest.ReleaseBuf_SetLen(i);
281 }
282 }
283
284 #endif
285
286
MultiByteToUnicodeString(const AString & src,UINT codePage)287 UString MultiByteToUnicodeString(const AString &src, UINT codePage)
288 {
289 UString dest;
290 MultiByteToUnicodeString2(dest, src, codePage);
291 return dest;
292 }
293
UnicodeStringToMultiByte2(AString & dest,const UString & src,UINT codePage)294 void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage)
295 {
296 bool defaultCharWasUsed;
297 UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
298 }
299
UnicodeStringToMultiByte(const UString & src,UINT codePage,char defaultChar,bool & defaultCharWasUsed)300 AString UnicodeStringToMultiByte(const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
301 {
302 AString dest;
303 UnicodeStringToMultiByte2(dest, src, codePage, defaultChar, defaultCharWasUsed);
304 return dest;
305 }
306
UnicodeStringToMultiByte(const UString & src,UINT codePage)307 AString UnicodeStringToMultiByte(const UString &src, UINT codePage)
308 {
309 AString dest;
310 bool defaultCharWasUsed;
311 UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
312 return dest;
313 }
314