• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Common/StringConvert.cpp
2 
3 #include "StdAfx.h"
4 
5 #include "StringConvert.h"
6 
7 #ifndef _WIN32
8 #include <stdlib.h>
9 #endif
10 
11 static const char k_DefultChar = '_';
12 
13 #ifdef _WIN32
14 
15 /*
16 MultiByteToWideChar(CodePage, DWORD dwFlags,
17     LPCSTR lpMultiByteStr, int cbMultiByte,
18     LPWSTR lpWideCharStr, int cchWideChar)
19 
20   if (cbMultiByte == 0)
21     return: 0. ERR: ERROR_INVALID_PARAMETER
22 
23   if (cchWideChar == 0)
24     return: the required buffer size in characters.
25 
26   if (supplied buffer size was not large enough)
27     return: 0. ERR: ERROR_INSUFFICIENT_BUFFER
28     The number of filled characters in lpWideCharStr can be smaller than cchWideChar (if last character is complex)
29 
30   If there are illegal characters:
31     if MB_ERR_INVALID_CHARS is set in dwFlags:
32       - the function stops conversion on illegal character.
33       - Return: 0. ERR: ERROR_NO_UNICODE_TRANSLATION.
34 
35     if MB_ERR_INVALID_CHARS is NOT set in dwFlags:
36       before Vista: illegal character is dropped (skipped). WinXP-64: GetLastError() returns 0.
37       in Vista+:    illegal character is not dropped (MSDN). Undocumented: illegal
38                     character is converted to U+FFFD, which is REPLACEMENT CHARACTER.
39 */
40 
41 
MultiByteToUnicodeString2(UString & dest,const AString & src,UINT codePage)42 void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT codePage)
43 {
44   dest.Empty();
45   if (src.IsEmpty())
46     return;
47   {
48     /*
49     wchar_t *d = dest.GetBuf(src.Len());
50     const char *s = (const char *)src;
51     unsigned i;
52 
53     for (i = 0;;)
54     {
55       Byte c = (Byte)s[i];
56       if (c >= 0x80 || c == 0)
57         break;
58       d[i++] = (wchar_t)c;
59     }
60 
61     if (i != src.Len())
62     {
63       unsigned len = MultiByteToWideChar(codePage, 0, s + i,
64           src.Len() - i, d + i,
65           src.Len() + 1 - i);
66       if (len == 0)
67         throw 282228;
68       i += len;
69     }
70 
71     d[i] = 0;
72     dest.ReleaseBuf_SetLen(i);
73     */
74     unsigned len = MultiByteToWideChar(codePage, 0, src, src.Len(), NULL, 0);
75     if (len == 0)
76     {
77       if (GetLastError() != 0)
78         throw 282228;
79     }
80     else
81     {
82       len = MultiByteToWideChar(codePage, 0, src, src.Len(), dest.GetBuf(len), len);
83       if (len == 0)
84         throw 282228;
85       dest.ReleaseBuf_SetEnd(len);
86     }
87   }
88 }
89 
90 /*
91   int WideCharToMultiByte(
92       UINT CodePage, DWORD dwFlags,
93       LPCWSTR lpWideCharStr, int cchWideChar,
94       LPSTR lpMultiByteStr, int cbMultiByte,
95       LPCSTR lpDefaultChar, LPBOOL lpUsedDefaultChar);
96 
97 if (lpDefaultChar == NULL),
98   - it uses system default value.
99 
100 if (CodePage == CP_UTF7 || CodePage == CP_UTF8)
101   if (lpDefaultChar != NULL || lpUsedDefaultChar != NULL)
102     return: 0. ERR: ERROR_INVALID_PARAMETER.
103 
104 The function operates most efficiently, if (lpDefaultChar == NULL && lpUsedDefaultChar == NULL)
105 
106 */
107 
UnicodeStringToMultiByte2(AString & dest,const UString & src,UINT codePage,char defaultChar,bool & defaultCharWasUsed)108 static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
109 {
110   dest.Empty();
111   defaultCharWasUsed = false;
112   if (src.IsEmpty())
113     return;
114   {
115     /*
116     unsigned numRequiredBytes = src.Len() * 2;
117     char *d = dest.GetBuf(numRequiredBytes);
118     const wchar_t *s = (const wchar_t *)src;
119     unsigned i;
120 
121     for (i = 0;;)
122     {
123       wchar_t c = s[i];
124       if (c >= 0x80 || c == 0)
125         break;
126       d[i++] = (char)c;
127     }
128 
129     if (i != src.Len())
130     {
131       BOOL defUsed = FALSE;
132       defaultChar = defaultChar;
133 
134       bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
135       unsigned len = WideCharToMultiByte(codePage, 0, s + i, src.Len() - i,
136           d + i, numRequiredBytes + 1 - i,
137           (isUtf ? NULL : &defaultChar),
138           (isUtf ? NULL : &defUsed));
139       defaultCharWasUsed = (defUsed != FALSE);
140       if (len == 0)
141         throw 282229;
142       i += len;
143     }
144 
145     d[i] = 0;
146     dest.ReleaseBuf_SetLen(i);
147     */
148 
149     /*
150     if (codePage != CP_UTF7)
151     {
152       const wchar_t *s = (const wchar_t *)src;
153       unsigned i;
154       for (i = 0;; i++)
155       {
156         wchar_t c = s[i];
157         if (c >= 0x80 || c == 0)
158           break;
159       }
160 
161       if (s[i] == 0)
162       {
163         char *d = dest.GetBuf(src.Len());
164         for (i = 0;;)
165         {
166           wchar_t c = s[i];
167           if (c == 0)
168             break;
169           d[i++] = (char)c;
170         }
171         d[i] = 0;
172         dest.ReleaseBuf_SetLen(i);
173         return;
174       }
175     }
176     */
177 
178     unsigned len = WideCharToMultiByte(codePage, 0, src, src.Len(), NULL, 0, NULL, NULL);
179     if (len == 0)
180     {
181       if (GetLastError() != 0)
182         throw 282228;
183     }
184     else
185     {
186       BOOL defUsed = FALSE;
187       bool isUtf = (codePage == CP_UTF8 || codePage == CP_UTF7);
188       // defaultChar = defaultChar;
189       len = WideCharToMultiByte(codePage, 0, src, src.Len(),
190           dest.GetBuf(len), len,
191           (isUtf ? NULL : &defaultChar),
192           (isUtf ? NULL : &defUsed)
193           );
194       if (!isUtf)
195         defaultCharWasUsed = (defUsed != FALSE);
196       if (len == 0)
197         throw 282228;
198       dest.ReleaseBuf_SetEnd(len);
199     }
200   }
201 }
202 
203 /*
204 #ifndef UNDER_CE
205 AString SystemStringToOemString(const CSysString &src)
206 {
207   AString dest;
208   const unsigned len = src.Len() * 2;
209   CharToOem(src, dest.GetBuf(len));
210   dest.ReleaseBuf_CalcLen(len);
211   return dest;
212 }
213 #endif
214 */
215 
216 #else
217 
MultiByteToUnicodeString2(UString & dest,const AString & src,UINT)218 void MultiByteToUnicodeString2(UString &dest, const AString &src, UINT /* codePage */)
219 {
220   dest.Empty();
221   if (src.IsEmpty())
222     return;
223 
224   size_t limit = ((size_t)src.Len() + 1) * 2;
225   wchar_t *d = dest.GetBuf((unsigned)limit);
226   size_t len = mbstowcs(d, src, limit);
227   if (len != (size_t)-1)
228   {
229     dest.ReleaseBuf_SetEnd((unsigned)len);
230     return;
231   }
232 
233   {
234     unsigned i;
235     const char *s = (const char *)src;
236     for (i = 0;;)
237     {
238       Byte c = (Byte)s[i];
239       if (c == 0)
240         break;
241       d[i++] = (wchar_t)c;
242     }
243     d[i] = 0;
244     dest.ReleaseBuf_SetLen(i);
245   }
246 }
247 
UnicodeStringToMultiByte2(AString & dest,const UString & src,UINT,char defaultChar,bool & defaultCharWasUsed)248 static void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT /* codePage */, char defaultChar, bool &defaultCharWasUsed)
249 {
250   dest.Empty();
251   defaultCharWasUsed = false;
252   if (src.IsEmpty())
253     return;
254 
255   size_t limit = ((size_t)src.Len() + 1) * 6;
256   char *d = dest.GetBuf((unsigned)limit);
257   size_t len = wcstombs(d, src, limit);
258   if (len != (size_t)-1)
259   {
260     dest.ReleaseBuf_SetEnd((unsigned)len);
261     return;
262   }
263 
264   {
265     const wchar_t *s = (const wchar_t *)src;
266     unsigned i;
267     for (i = 0;;)
268     {
269       wchar_t c = s[i];
270       if (c == 0)
271         break;
272       if (c >= 0x100)
273       {
274         c = defaultChar;
275         defaultCharWasUsed = true;
276       }
277       d[i++] = (char)c;
278     }
279     d[i] = 0;
280     dest.ReleaseBuf_SetLen(i);
281   }
282 }
283 
284 #endif
285 
286 
MultiByteToUnicodeString(const AString & src,UINT codePage)287 UString MultiByteToUnicodeString(const AString &src, UINT codePage)
288 {
289   UString dest;
290   MultiByteToUnicodeString2(dest, src, codePage);
291   return dest;
292 }
293 
MultiByteToUnicodeString(const char * src,UINT codePage)294 UString MultiByteToUnicodeString(const char *src, UINT codePage)
295 {
296   return MultiByteToUnicodeString(AString(src), codePage);
297 }
298 
299 
UnicodeStringToMultiByte2(AString & dest,const UString & src,UINT codePage)300 void UnicodeStringToMultiByte2(AString &dest, const UString &src, UINT codePage)
301 {
302   bool defaultCharWasUsed;
303   UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
304 }
305 
UnicodeStringToMultiByte(const UString & src,UINT codePage,char defaultChar,bool & defaultCharWasUsed)306 AString UnicodeStringToMultiByte(const UString &src, UINT codePage, char defaultChar, bool &defaultCharWasUsed)
307 {
308   AString dest;
309   UnicodeStringToMultiByte2(dest, src, codePage, defaultChar, defaultCharWasUsed);
310   return dest;
311 }
312 
UnicodeStringToMultiByte(const UString & src,UINT codePage)313 AString UnicodeStringToMultiByte(const UString &src, UINT codePage)
314 {
315   AString dest;
316   bool defaultCharWasUsed;
317   UnicodeStringToMultiByte2(dest, src, codePage, k_DefultChar, defaultCharWasUsed);
318   return dest;
319 }
320