1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Some of the source code in this file came from fs/cifs/cifs_unicode.c
4 *
5 * Copyright (c) International Business Machines Corp., 2000,2009
6 * Modified by Steve French (sfrench@us.ibm.com)
7 * Modified by Namjae Jeon (linkinjeon@kernel.org)
8 */
9 #include <linux/fs.h>
10 #include <linux/slab.h>
11 #include <asm/unaligned.h>
12 #include "glob.h"
13 #include "unicode.h"
14 #include "uniupr.h"
15 #include "smb_common.h"
16
17 /*
18 * cifs_mapchar() - convert a host-endian char to proper char in codepage
19 * @target: where converted character should be copied
20 * @from: host-endian source string
21 * @cp: codepage to which character should be converted
22 * @mapchar: should character be mapped according to mapchars mount option?
23 *
24 * This function handles the conversion of a single character. It is the
25 * responsibility of the caller to ensure that the target buffer is large
26 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
27 *
28 * Return: string length after conversion
29 */
30 static int
cifs_mapchar(char * target,const __u16 * from,const struct nls_table * cp,bool mapchar)31 cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp,
32 bool mapchar)
33 {
34 int len = 1;
35 __u16 src_char;
36
37 src_char = *from;
38
39 if (!mapchar)
40 goto cp_convert;
41
42 /*
43 * BB: Cannot handle remapping UNI_SLASH until all the calls to
44 * build_path_from_dentry are modified, as they use slash as
45 * separator.
46 */
47 switch (src_char) {
48 case UNI_COLON:
49 *target = ':';
50 break;
51 case UNI_ASTERISK:
52 *target = '*';
53 break;
54 case UNI_QUESTION:
55 *target = '?';
56 break;
57 case UNI_PIPE:
58 *target = '|';
59 break;
60 case UNI_GRTRTHAN:
61 *target = '>';
62 break;
63 case UNI_LESSTHAN:
64 *target = '<';
65 break;
66 default:
67 goto cp_convert;
68 }
69
70 out:
71 return len;
72
73 cp_convert:
74 len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
75 if (len <= 0)
76 goto surrogate_pair;
77
78 goto out;
79
80 surrogate_pair:
81 /* convert SURROGATE_PAIR and IVS */
82 if (strcmp(cp->charset, "utf8"))
83 goto unknown;
84 len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
85 if (len <= 0)
86 goto unknown;
87 return len;
88
89 unknown:
90 *target = '?';
91 len = 1;
92 goto out;
93 }
94
95 /*
96 * smb_utf16_bytes() - compute converted string length
97 * @from: pointer to input string
98 * @maxbytes: input string length
99 * @codepage: destination codepage
100 *
101 * Walk a utf16le string and return the number of bytes that the string will
102 * be after being converted to the given charset, not including any null
103 * termination required. Don't walk past maxbytes in the source buffer.
104 *
105 * Return: string length after conversion
106 */
smb_utf16_bytes(const __le16 * from,int maxbytes,const struct nls_table * codepage)107 static int smb_utf16_bytes(const __le16 *from, int maxbytes,
108 const struct nls_table *codepage)
109 {
110 int i, j;
111 int charlen, outlen = 0;
112 int maxwords = maxbytes / 2;
113 char tmp[NLS_MAX_CHARSET_SIZE];
114 __u16 ftmp[3];
115
116 for (i = 0; i < maxwords; i++) {
117 ftmp[0] = get_unaligned_le16(&from[i]);
118 if (ftmp[0] == 0)
119 break;
120 for (j = 1; j <= 2; j++) {
121 if (i + j < maxwords)
122 ftmp[j] = get_unaligned_le16(&from[i + j]);
123 else
124 ftmp[j] = 0;
125 }
126
127 charlen = cifs_mapchar(tmp, ftmp, codepage, 0);
128 if (charlen > 0)
129 outlen += charlen;
130 else
131 outlen++;
132 }
133
134 return outlen;
135 }
136
137 /*
138 * smb_from_utf16() - convert utf16le string to local charset
139 * @to: destination buffer
140 * @from: source buffer
141 * @tolen: destination buffer size (in bytes)
142 * @fromlen: source buffer size (in bytes)
143 * @codepage: codepage to which characters should be converted
144 * @mapchar: should characters be remapped according to the mapchars option?
145 *
146 * Convert a little-endian utf16le string (as sent by the server) to a string
147 * in the provided codepage. The tolen and fromlen parameters are to ensure
148 * that the code doesn't walk off of the end of the buffer (which is always
149 * a danger if the alignment of the source buffer is off). The destination
150 * string is always properly null terminated and fits in the destination
151 * buffer. Returns the length of the destination string in bytes (including
152 * null terminator).
153 *
154 * Note that some windows versions actually send multiword UTF-16 characters
155 * instead of straight UTF16-2. The linux nls routines however aren't able to
156 * deal with those characters properly. In the event that we get some of
157 * those characters, they won't be translated properly.
158 *
159 * Return: string length after conversion
160 */
smb_from_utf16(char * to,const __le16 * from,int tolen,int fromlen,const struct nls_table * codepage,bool mapchar)161 static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
162 const struct nls_table *codepage, bool mapchar)
163 {
164 int i, j, charlen, safelen;
165 int outlen = 0;
166 int nullsize = nls_nullsize(codepage);
167 int fromwords = fromlen / 2;
168 char tmp[NLS_MAX_CHARSET_SIZE];
169 __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */
170
171 /*
172 * because the chars can be of varying widths, we need to take care
173 * not to overflow the destination buffer when we get close to the
174 * end of it. Until we get to this offset, we don't need to check
175 * for overflow however.
176 */
177 safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
178
179 for (i = 0; i < fromwords; i++) {
180 ftmp[0] = get_unaligned_le16(&from[i]);
181 if (ftmp[0] == 0)
182 break;
183 for (j = 1; j <= 2; j++) {
184 if (i + j < fromwords)
185 ftmp[j] = get_unaligned_le16(&from[i + j]);
186 else
187 ftmp[j] = 0;
188 }
189
190 /*
191 * check to see if converting this character might make the
192 * conversion bleed into the null terminator
193 */
194 if (outlen >= safelen) {
195 charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);
196 if ((outlen + charlen) > (tolen - nullsize))
197 break;
198 }
199
200 /* put converted char into 'to' buffer */
201 charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
202 outlen += charlen;
203
204 /*
205 * charlen (=bytes of UTF-8 for 1 character)
206 * 4bytes UTF-8(surrogate pair) is charlen=4
207 * (4bytes UTF-16 code)
208 * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
209 * (2 UTF-8 pairs divided to 2 UTF-16 pairs)
210 */
211 if (charlen == 4)
212 i++;
213 else if (charlen >= 5)
214 /* 5-6bytes UTF-8 */
215 i += 2;
216 }
217
218 /* properly null-terminate string */
219 for (i = 0; i < nullsize; i++)
220 to[outlen++] = 0;
221
222 return outlen;
223 }
224
225 /*
226 * smb_strtoUTF16() - Convert character string to unicode string
227 * @to: destination buffer
228 * @from: source buffer
229 * @len: destination buffer size (in bytes)
230 * @codepage: codepage to which characters should be converted
231 *
232 * Return: string length after conversion
233 */
smb_strtoUTF16(__le16 * to,const char * from,int len,const struct nls_table * codepage)234 int smb_strtoUTF16(__le16 *to, const char *from, int len,
235 const struct nls_table *codepage)
236 {
237 int charlen;
238 int i;
239 wchar_t wchar_to; /* needed to quiet sparse */
240
241 /* special case for utf8 to handle no plane0 chars */
242 if (!strcmp(codepage->charset, "utf8")) {
243 /*
244 * convert utf8 -> utf16, we assume we have enough space
245 * as caller should have assumed conversion does not overflow
246 * in destination len is length in wchar_t units (16bits)
247 */
248 i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
249 (wchar_t *)to, len);
250
251 /* if success terminate and exit */
252 if (i >= 0)
253 goto success;
254 /*
255 * if fails fall back to UCS encoding as this
256 * function should not return negative values
257 * currently can fail only if source contains
258 * invalid encoded characters
259 */
260 }
261
262 for (i = 0; len > 0 && *from; i++, from += charlen, len -= charlen) {
263 charlen = codepage->char2uni(from, len, &wchar_to);
264 if (charlen < 1) {
265 /* A question mark */
266 wchar_to = 0x003f;
267 charlen = 1;
268 }
269 put_unaligned_le16(wchar_to, &to[i]);
270 }
271
272 success:
273 put_unaligned_le16(0, &to[i]);
274 return i;
275 }
276
277 /*
278 * smb_strndup_from_utf16() - copy a string from wire format to the local
279 * codepage
280 * @src: source string
281 * @maxlen: don't walk past this many bytes in the source string
282 * @is_unicode: is this a unicode string?
283 * @codepage: destination codepage
284 *
285 * Take a string given by the server, convert it to the local codepage and
286 * put it in a new buffer. Returns a pointer to the new string or NULL on
287 * error.
288 *
289 * Return: destination string buffer or error ptr
290 */
smb_strndup_from_utf16(const char * src,const int maxlen,const bool is_unicode,const struct nls_table * codepage)291 char *smb_strndup_from_utf16(const char *src, const int maxlen,
292 const bool is_unicode,
293 const struct nls_table *codepage)
294 {
295 int len, ret;
296 char *dst;
297
298 if (is_unicode) {
299 len = smb_utf16_bytes((__le16 *)src, maxlen, codepage);
300 len += nls_nullsize(codepage);
301 dst = kmalloc(len, GFP_KERNEL);
302 if (!dst)
303 return ERR_PTR(-ENOMEM);
304 ret = smb_from_utf16(dst, (__le16 *)src, len, maxlen, codepage,
305 false);
306 if (ret < 0) {
307 kfree(dst);
308 return ERR_PTR(-EINVAL);
309 }
310 } else {
311 len = strnlen(src, maxlen);
312 len++;
313 dst = kmalloc(len, GFP_KERNEL);
314 if (!dst)
315 return ERR_PTR(-ENOMEM);
316 strscpy(dst, src, len);
317 }
318
319 return dst;
320 }
321
322 /*
323 * Convert 16 bit Unicode pathname to wire format from string in current code
324 * page. Conversion may involve remapping up the six characters that are
325 * only legal in POSIX-like OS (if they are present in the string). Path
326 * names are little endian 16 bit Unicode on the wire
327 */
328 /*
329 * smbConvertToUTF16() - convert string from local charset to utf16
330 * @target: destination buffer
331 * @source: source buffer
332 * @srclen: source buffer size (in bytes)
333 * @cp: codepage to which characters should be converted
334 * @mapchar: should characters be remapped according to the mapchars option?
335 *
336 * Convert 16 bit Unicode pathname to wire format from string in current code
337 * page. Conversion may involve remapping up the six characters that are
338 * only legal in POSIX-like OS (if they are present in the string). Path
339 * names are little endian 16 bit Unicode on the wire
340 *
341 * Return: char length after conversion
342 */
smbConvertToUTF16(__le16 * target,const char * source,int srclen,const struct nls_table * cp,int mapchars)343 int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
344 const struct nls_table *cp, int mapchars)
345 {
346 int i, j, charlen;
347 char src_char;
348 __le16 dst_char;
349 wchar_t tmp;
350 wchar_t wchar_to[6]; /* UTF-16 */
351 int ret;
352 unicode_t u;
353
354 if (!mapchars)
355 return smb_strtoUTF16(target, source, srclen, cp);
356
357 for (i = 0, j = 0; i < srclen; j++) {
358 src_char = source[i];
359 charlen = 1;
360 switch (src_char) {
361 case 0:
362 put_unaligned(0, &target[j]);
363 return j;
364 case ':':
365 dst_char = cpu_to_le16(UNI_COLON);
366 break;
367 case '*':
368 dst_char = cpu_to_le16(UNI_ASTERISK);
369 break;
370 case '?':
371 dst_char = cpu_to_le16(UNI_QUESTION);
372 break;
373 case '<':
374 dst_char = cpu_to_le16(UNI_LESSTHAN);
375 break;
376 case '>':
377 dst_char = cpu_to_le16(UNI_GRTRTHAN);
378 break;
379 case '|':
380 dst_char = cpu_to_le16(UNI_PIPE);
381 break;
382 /*
383 * FIXME: We can not handle remapping backslash (UNI_SLASH)
384 * until all the calls to build_path_from_dentry are modified,
385 * as they use backslash as separator.
386 */
387 default:
388 charlen = cp->char2uni(source + i, srclen - i, &tmp);
389 dst_char = cpu_to_le16(tmp);
390
391 /*
392 * if no match, use question mark, which at least in
393 * some cases serves as wild card
394 */
395 if (charlen > 0)
396 goto ctoUTF16;
397
398 /* convert SURROGATE_PAIR */
399 if (strcmp(cp->charset, "utf8"))
400 goto unknown;
401 if (*(source + i) & 0x80) {
402 charlen = utf8_to_utf32(source + i, 6, &u);
403 if (charlen < 0)
404 goto unknown;
405 } else
406 goto unknown;
407 ret = utf8s_to_utf16s(source + i, charlen,
408 UTF16_LITTLE_ENDIAN,
409 wchar_to, 6);
410 if (ret < 0)
411 goto unknown;
412
413 i += charlen;
414 dst_char = cpu_to_le16(*wchar_to);
415 if (charlen <= 3)
416 /* 1-3bytes UTF-8 to 2bytes UTF-16 */
417 put_unaligned(dst_char, &target[j]);
418 else if (charlen == 4) {
419 /*
420 * 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
421 * 7-8bytes UTF-8(IVS) divided to 2 UTF-16
422 * (charlen=3+4 or 4+4)
423 */
424 put_unaligned(dst_char, &target[j]);
425 dst_char = cpu_to_le16(*(wchar_to + 1));
426 j++;
427 put_unaligned(dst_char, &target[j]);
428 } else if (charlen >= 5) {
429 /* 5-6bytes UTF-8 to 6bytes UTF-16 */
430 put_unaligned(dst_char, &target[j]);
431 dst_char = cpu_to_le16(*(wchar_to + 1));
432 j++;
433 put_unaligned(dst_char, &target[j]);
434 dst_char = cpu_to_le16(*(wchar_to + 2));
435 j++;
436 put_unaligned(dst_char, &target[j]);
437 }
438 continue;
439
440 unknown:
441 dst_char = cpu_to_le16(0x003f);
442 charlen = 1;
443 }
444
445 ctoUTF16:
446 /*
447 * character may take more than one byte in the source string,
448 * but will take exactly two bytes in the target string
449 */
450 i += charlen;
451 put_unaligned(dst_char, &target[j]);
452 }
453
454 return j;
455 }
456