• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * unistr.c - Unicode string handling. Originated from the Linux-NTFS project.
3  *
4  * Copyright (c) 2000-2004 Anton Altaparmakov
5  * Copyright (c) 2002-2009 Szabolcs Szakacsits
6  * Copyright (c) 2008-2015 Jean-Pierre Andre
7  * Copyright (c) 2008      Bernhard Kaindl
8  *
9  * This program/include file is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as published
11  * by the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program/include file is distributed in the hope that it will be
15  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
16  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program (in the main directory of the NTFS-3G
21  * distribution in the file COPYING); if not, write to the Free Software
22  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23  */
24 
25 #ifdef HAVE_CONFIG_H
26 #include "config.h"
27 #endif
28 
29 #ifdef HAVE_STDIO_H
30 #include <stdio.h>
31 #endif
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35 #ifdef HAVE_WCHAR_H
36 #include <wchar.h>
37 #endif
38 #ifdef HAVE_STRING_H
39 #include <string.h>
40 #endif
41 #ifdef HAVE_ERRNO_H
42 #include <errno.h>
43 #endif
44 #ifdef HAVE_LOCALE_H
45 #include <locale.h>
46 #endif
47 
48 #if defined(__APPLE__) || defined(__DARWIN__)
49 #ifdef ENABLE_NFCONV
50 #include <CoreFoundation/CoreFoundation.h>
51 #endif /* ENABLE_NFCONV */
52 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
53 
54 #include "compat.h"
55 #include "attrib.h"
56 #include "types.h"
57 #include "unistr.h"
58 #include "debug.h"
59 #include "logging.h"
60 #include "misc.h"
61 
62 #ifndef ALLOW_BROKEN_UNICODE
63 /* Erik allowing broken UTF-16 surrogate pairs and U+FFFE and U+FFFF by default,
64  * open to debate. */
65 #define ALLOW_BROKEN_UNICODE 1
66 #endif /* !defined(ALLOW_BROKEN_UNICODE) */
67 
68 /*
69  * IMPORTANT
70  * =========
71  *
72  * All these routines assume that the Unicode characters are in little endian
73  * encoding inside the strings!!!
74  */
75 
76 static int use_utf8 = 1; /* use UTF-8 encoding for file names */
77 
78 #if defined(__APPLE__) || defined(__DARWIN__)
79 #ifdef ENABLE_NFCONV
80 /**
81  * This variable controls whether or not automatic normalization form conversion
82  * should be performed when translating NTFS unicode file names to UTF-8.
83  * Defaults to on, but can be controlled from the outside using the function
84  *   int ntfs_macosx_normalize_filenames(int normalize);
85  */
86 static int nfconvert_utf8 = 1;
87 #endif /* ENABLE_NFCONV */
88 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
89 
90 /*
91  * This is used by the name collation functions to quickly determine what
92  * characters are (in)valid.
93  */
94 #if 0
95 static const u8 legal_ansi_char_array[0x40] = {
96 	0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
97 	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
98 
99 	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
100 	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
101 
102 	0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17,
103 	0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00,
104 
105 	0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
106 	0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18,
107 };
108 #endif
109 
110 /**
111  * ntfs_names_are_equal - compare two Unicode names for equality
112  * @s1:			name to compare to @s2
113  * @s1_len:		length in Unicode characters of @s1
114  * @s2:			name to compare to @s1
115  * @s2_len:		length in Unicode characters of @s2
116  * @ic:			ignore case bool
117  * @upcase:		upcase table (only if @ic == IGNORE_CASE)
118  * @upcase_size:	length in Unicode characters of @upcase (if present)
119  *
120  * Compare the names @s1 and @s2 and return TRUE (1) if the names are
121  * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE,
122  * the @upcase table is used to perform a case insensitive comparison.
123  */
ntfs_names_are_equal(const ntfschar * s1,size_t s1_len,const ntfschar * s2,size_t s2_len,const IGNORE_CASE_BOOL ic,const ntfschar * upcase,const u32 upcase_size)124 BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len,
125 		const ntfschar *s2, size_t s2_len,
126 		const IGNORE_CASE_BOOL ic,
127 		const ntfschar *upcase, const u32 upcase_size)
128 {
129 	if (s1_len != s2_len)
130 		return FALSE;
131 	if (!s1_len)
132 		return TRUE;
133 	if (ic == CASE_SENSITIVE)
134 		return ntfs_ucsncmp(s1, s2, s1_len) ? FALSE: TRUE;
135 	return ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size) ? FALSE:
136 								       TRUE;
137 }
138 
139 /*
140  * ntfs_names_full_collate() fully collate two Unicode names
141  *
142  * @name1:	first Unicode name to compare
143  * @name1_len:	length of first Unicode name to compare
144  * @name2:	second Unicode name to compare
145  * @name2_len:	length of second Unicode name to compare
146  * @ic:		either CASE_SENSITIVE or IGNORE_CASE (see below)
147  * @upcase:	upcase table
148  * @upcase_len:	upcase table size
149  *
150  * If @ic is CASE_SENSITIVE, then the names are compared primarily ignoring
151  * case, but if the names are equal ignoring case, then they are compared
152  * case-sensitively.  As an example, "abc" would collate before "BCD" (since
153  * "abc" and "BCD" differ ignoring case and 'A' < 'B') but after "ABC" (since
154  * "ABC" and "abc" are equal ignoring case and 'A' < 'a').  This matches the
155  * collation order of filenames as indexed in NTFS directories.
156  *
157  * If @ic is IGNORE_CASE, then the names are only compared case-insensitively
158  * and are considered to match if and only if they are equal ignoring case.
159  *
160  * Returns:
161  *  -1 if the first name collates before the second one,
162  *   0 if the names match, or
163  *   1 if the second name collates before the first one
164  */
ntfs_names_full_collate(const ntfschar * name1,const u32 name1_len,const ntfschar * name2,const u32 name2_len,const IGNORE_CASE_BOOL ic,const ntfschar * upcase,const u32 upcase_len)165 int ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len,
166 		const ntfschar *name2, const u32 name2_len,
167 		const IGNORE_CASE_BOOL ic, const ntfschar *upcase,
168 		const u32 upcase_len)
169 {
170 	u32 cnt;
171 	u16 c1, c2;
172 	u16 u1, u2;
173 
174 #ifdef DEBUG
175 	if (!name1 || !name2 || !upcase || !upcase_len) {
176 		ntfs_log_debug("ntfs_names_collate received NULL pointer!\n");
177 		exit(1);
178 	}
179 #endif
180 	cnt = min(name1_len, name2_len);
181 	if (cnt > 0) {
182 		if (ic == CASE_SENSITIVE) {
183 			while (--cnt && (*name1 == *name2)) {
184 				name1++;
185 				name2++;
186 			}
187 			u1 = c1 = le16_to_cpu(*name1);
188 			u2 = c2 = le16_to_cpu(*name2);
189 			if (u1 < upcase_len)
190 				u1 = le16_to_cpu(upcase[u1]);
191 			if (u2 < upcase_len)
192 				u2 = le16_to_cpu(upcase[u2]);
193 			if ((u1 == u2) && cnt)
194 				do {
195 					name1++;
196 					u1 = le16_to_cpu(*name1);
197 					name2++;
198 					u2 = le16_to_cpu(*name2);
199 					if (u1 < upcase_len)
200 						u1 = le16_to_cpu(upcase[u1]);
201 					if (u2 < upcase_len)
202 						u2 = le16_to_cpu(upcase[u2]);
203 				} while ((u1 == u2) && --cnt);
204 			if (u1 < u2)
205 				return -1;
206 			if (u1 > u2)
207 				return 1;
208 			if (name1_len < name2_len)
209 				return -1;
210 			if (name1_len > name2_len)
211 				return 1;
212 			if (c1 < c2)
213 				return -1;
214 			if (c1 > c2)
215 				return 1;
216 		} else {
217 			do {
218 				u1 = le16_to_cpu(*name1);
219 				name1++;
220 				u2 = le16_to_cpu(*name2);
221 				name2++;
222 				if (u1 < upcase_len)
223 					u1 = le16_to_cpu(upcase[u1]);
224 				if (u2 < upcase_len)
225 					u2 = le16_to_cpu(upcase[u2]);
226 			} while ((u1 == u2) && --cnt);
227 			if (u1 < u2)
228 				return -1;
229 			if (u1 > u2)
230 				return 1;
231 			if (name1_len < name2_len)
232 				return -1;
233 			if (name1_len > name2_len)
234 				return 1;
235 		}
236 	} else {
237 		if (name1_len < name2_len)
238 			return -1;
239 		if (name1_len > name2_len)
240 			return 1;
241 	}
242 	return 0;
243 }
244 
245 /**
246  * ntfs_ucsncmp - compare two little endian Unicode strings
247  * @s1:		first string
248  * @s2:		second string
249  * @n:		maximum unicode characters to compare
250  *
251  * Compare the first @n characters of the Unicode strings @s1 and @s2,
252  * The strings in little endian format and appropriate le16_to_cpu()
253  * conversion is performed on non-little endian machines.
254  *
255  * The function returns an integer less than, equal to, or greater than zero
256  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
257  * to be less than, to match, or be greater than @s2.
258  */
ntfs_ucsncmp(const ntfschar * s1,const ntfschar * s2,size_t n)259 int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n)
260 {
261 	u16 c1, c2;
262 	size_t i;
263 
264 #ifdef DEBUG
265 	if (!s1 || !s2) {
266 		ntfs_log_debug("ntfs_wcsncmp() received NULL pointer!\n");
267 		exit(1);
268 	}
269 #endif
270 	for (i = 0; i < n; ++i) {
271 		c1 = le16_to_cpu(s1[i]);
272 		c2 = le16_to_cpu(s2[i]);
273 		if (c1 < c2)
274 			return -1;
275 		if (c1 > c2)
276 			return 1;
277 		if (!c1)
278 			break;
279 	}
280 	return 0;
281 }
282 
283 /**
284  * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case
285  * @s1:			first string
286  * @s2:			second string
287  * @n:			maximum unicode characters to compare
288  * @upcase:		upcase table
289  * @upcase_size:	upcase table size in Unicode characters
290  *
291  * Compare the first @n characters of the Unicode strings @s1 and @s2,
292  * ignoring case. The strings in little endian format and appropriate
293  * le16_to_cpu() conversion is performed on non-little endian machines.
294  *
295  * Each character is uppercased using the @upcase table before the comparison.
296  *
297  * The function returns an integer less than, equal to, or greater than zero
298  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
299  * to be less than, to match, or be greater than @s2.
300  */
ntfs_ucsncasecmp(const ntfschar * s1,const ntfschar * s2,size_t n,const ntfschar * upcase,const u32 upcase_size)301 int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n,
302 		const ntfschar *upcase, const u32 upcase_size)
303 {
304 	u16 c1, c2;
305 	size_t i;
306 
307 #ifdef DEBUG
308 	if (!s1 || !s2 || !upcase) {
309 		ntfs_log_debug("ntfs_wcsncasecmp() received NULL pointer!\n");
310 		exit(1);
311 	}
312 #endif
313 	for (i = 0; i < n; ++i) {
314 		if ((c1 = le16_to_cpu(s1[i])) < upcase_size)
315 			c1 = le16_to_cpu(upcase[c1]);
316 		if ((c2 = le16_to_cpu(s2[i])) < upcase_size)
317 			c2 = le16_to_cpu(upcase[c2]);
318 		if (c1 < c2)
319 			return -1;
320 		if (c1 > c2)
321 			return 1;
322 		if (!c1)
323 			break;
324 	}
325 	return 0;
326 }
327 
328 /**
329  * ntfs_ucsnlen - determine the length of a little endian Unicode string
330  * @s:		pointer to Unicode string
331  * @maxlen:	maximum length of string @s
332  *
333  * Return the number of Unicode characters in the little endian Unicode
334  * string @s up to a maximum of maxlen Unicode characters, not including
335  * the terminating (ntfschar)'\0'. If there is no (ntfschar)'\0' between @s
336  * and @s + @maxlen, @maxlen is returned.
337  *
338  * This function never looks beyond @s + @maxlen.
339  */
ntfs_ucsnlen(const ntfschar * s,u32 maxlen)340 u32 ntfs_ucsnlen(const ntfschar *s, u32 maxlen)
341 {
342 	u32 i;
343 
344 	for (i = 0; i < maxlen; i++) {
345 		if (!le16_to_cpu(s[i]))
346 			break;
347 	}
348 	return i;
349 }
350 
351 /**
352  * ntfs_ucsndup - duplicate little endian Unicode string
353  * @s:		pointer to Unicode string
354  * @maxlen:	maximum length of string @s
355  *
356  * Return a pointer to a new little endian Unicode string which is a duplicate
357  * of the string s.  Memory for the new string is obtained with ntfs_malloc(3),
358  * and can be freed with free(3).
359  *
360  * A maximum of @maxlen Unicode characters are copied and a terminating
361  * (ntfschar)'\0' little endian Unicode character is added.
362  *
363  * This function never looks beyond @s + @maxlen.
364  *
365  * Return a pointer to the new little endian Unicode string on success and NULL
366  * on failure with errno set to the error code.
367  */
ntfs_ucsndup(const ntfschar * s,u32 maxlen)368 ntfschar *ntfs_ucsndup(const ntfschar *s, u32 maxlen)
369 {
370 	ntfschar *dst;
371 	u32 len;
372 
373 	len = ntfs_ucsnlen(s, maxlen);
374 	dst = ntfs_malloc((len + 1) * sizeof(ntfschar));
375 	if (dst) {
376 		memcpy(dst, s, len * sizeof(ntfschar));
377 		dst[len] = const_cpu_to_le16(L'\0');
378 	}
379 	return dst;
380 }
381 
382 /**
383  * ntfs_name_upcase - Map an Unicode name to its uppercase equivalent
384  * @name:
385  * @name_len:
386  * @upcase:
387  * @upcase_len:
388  *
389  * Description...
390  *
391  * Returns:
392  */
ntfs_name_upcase(ntfschar * name,u32 name_len,const ntfschar * upcase,const u32 upcase_len)393 void ntfs_name_upcase(ntfschar *name, u32 name_len, const ntfschar *upcase,
394 		const u32 upcase_len)
395 {
396 	u32 i;
397 	u16 u;
398 
399 	for (i = 0; i < name_len; i++)
400 		if ((u = le16_to_cpu(name[i])) < upcase_len)
401 			name[i] = upcase[u];
402 }
403 
404 /**
405  * ntfs_name_locase - Map a Unicode name to its lowercase equivalent
406  */
ntfs_name_locase(ntfschar * name,u32 name_len,const ntfschar * locase,const u32 locase_len)407 void ntfs_name_locase(ntfschar *name, u32 name_len, const ntfschar *locase,
408 		const u32 locase_len)
409 {
410 	u32 i;
411 	u16 u;
412 
413 	if (locase)
414 		for (i = 0; i < name_len; i++)
415 			if ((u = le16_to_cpu(name[i])) < locase_len)
416 				name[i] = locase[u];
417 }
418 
419 /**
420  * ntfs_file_value_upcase - Convert a filename to upper case
421  * @file_name_attr:
422  * @upcase:
423  * @upcase_len:
424  *
425  * Description...
426  *
427  * Returns:
428  */
ntfs_file_value_upcase(FILE_NAME_ATTR * file_name_attr,const ntfschar * upcase,const u32 upcase_len)429 void ntfs_file_value_upcase(FILE_NAME_ATTR *file_name_attr,
430 		const ntfschar *upcase, const u32 upcase_len)
431 {
432 	ntfs_name_upcase((ntfschar*)&file_name_attr->file_name,
433 			file_name_attr->file_name_length, upcase, upcase_len);
434 }
435 
436 /*
437    NTFS uses Unicode (UTF-16LE [NTFS-3G uses UCS-2LE, which is enough
438    for now]) for path names, but the Unicode code points need to be
439    converted before a path can be accessed under NTFS. For 7 bit ASCII/ANSI,
440    glibc does this even without a locale in a hard-coded fashion as that
441    appears to be is easy because the low 7-bit ASCII range appears to be
442    available in all charsets but it does not convert anything if
443    there was some error with the locale setup or none set up like
444    when mount is called during early boot where he (by policy) do
445    not use locales (and may be not available if /usr is not yet mounted),
446    so this patch fixes the resulting issues for systems which use
447    UTF-8 and for others, specifying the locale in fstab brings them
448    the encoding which they want.
449 
450    If no locale is defined or there was a problem with setting one
451    up and whenever nl_langinfo(CODESET) returns a sting starting with
452    "ANSI", use an internal UCS-2LE <-> UTF-8 codeset converter to fix
453    the bug where NTFS-3G does not show any path names which include
454    international characters!!! (and also fails on creating them) as result.
455 
456    Author: Bernhard Kaindl <bk@suse.de>
457    Jean-Pierre Andre made it compliant with RFC3629/RFC2781.
458 */
459 
460 /*
461  * Return the number of bytes in UTF-8 needed (without the terminating null) to
462  * store the given UTF-16LE string.
463  *
464  * On error, -1 is returned, and errno is set to the error code. The following
465  * error codes can be expected:
466  *	EILSEQ		The input string is not valid UTF-16LE (only possible
467  *			if compiled without ALLOW_BROKEN_UNICODE).
468  *	ENAMETOOLONG	The length of the UTF-8 string in bytes (without the
469  *			terminating null) would exceed @outs_len.
470  */
utf16_to_utf8_size(const ntfschar * ins,const int ins_len,int outs_len)471 static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_len)
472 {
473 	int i, ret = -1;
474 	int count = 0;
475 	BOOL surrog;
476 
477 	surrog = FALSE;
478 	for (i = 0; i < ins_len && ins[i] && count <= outs_len; i++) {
479 		unsigned short c = le16_to_cpu(ins[i]);
480 		if (surrog) {
481 			if ((c >= 0xdc00) && (c < 0xe000)) {
482 				surrog = FALSE;
483 				count += 4;
484 			} else {
485 #if ALLOW_BROKEN_UNICODE
486 				/* The first UTF-16 unit of a surrogate pair has
487 				 * a value between 0xd800 and 0xdc00. It can be
488 				 * encoded as an individual UTF-8 sequence if we
489 				 * cannot combine it with the next UTF-16 unit
490 				 * unit as a surrogate pair. */
491 				surrog = FALSE;
492 				count += 3;
493 
494 				--i;
495 				continue;
496 #else
497 				goto fail;
498 #endif /* ALLOW_BROKEN_UNICODE */
499 			}
500 		} else
501 			if (c < 0x80)
502 				count++;
503 			else if (c < 0x800)
504 				count += 2;
505 			else if (c < 0xd800)
506 				count += 3;
507 			else if (c < 0xdc00)
508 				surrog = TRUE;
509 #if ALLOW_BROKEN_UNICODE
510 			else if (c < 0xe000)
511 				count += 3;
512 			else if (c >= 0xe000)
513 #else
514 			else if ((c >= 0xe000) && (c < 0xfffe))
515 #endif /* ALLOW_BROKEN_UNICODE */
516 				count += 3;
517 			else
518 				goto fail;
519 	}
520 
521 	if (surrog && count <= outs_len) {
522 #if ALLOW_BROKEN_UNICODE
523 		count += 3; /* ending with a single surrogate */
524 #else
525 		goto fail;
526 #endif /* ALLOW_BROKEN_UNICODE */
527 	}
528 
529 	if (count > outs_len) {
530 		errno = ENAMETOOLONG;
531 		goto out;
532 	}
533 
534 	ret = count;
535 out:
536 	return ret;
537 fail:
538 	errno = EILSEQ;
539 	goto out;
540 }
541 
542 /*
543  * ntfs_utf16_to_utf8 - convert a little endian UTF16LE string to an UTF-8 string
544  * @ins:	input utf16 string buffer
545  * @ins_len:	length of input string in utf16 characters
546  * @outs:	on return contains the (allocated) output multibyte string
547  * @outs_len:	length of output buffer in bytes (ignored if *@outs is NULL)
548  *
549  * Return -1 with errno set if string has invalid byte sequence or too long.
550  */
ntfs_utf16_to_utf8(const ntfschar * ins,const int ins_len,char ** outs,int outs_len)551 static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
552 			      char **outs, int outs_len)
553 {
554 #if defined(__APPLE__) || defined(__DARWIN__)
555 #ifdef ENABLE_NFCONV
556 	char *original_outs_value = *outs;
557 	int original_outs_len = outs_len;
558 #endif /* ENABLE_NFCONV */
559 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
560 
561 	char *t;
562 	int i, size, ret = -1;
563 	int halfpair;
564 
565 	halfpair = 0;
566 	if (!*outs) {
567 		/* If no output buffer was provided, we will allocate one and
568 		 * limit its length to PATH_MAX.  Note: we follow the standard
569 		 * convention of PATH_MAX including the terminating null. */
570 		outs_len = PATH_MAX;
571 	}
572 
573 	/* The size *with* the terminating null is limited to @outs_len,
574 	 * so the size *without* the terminating null is limited to one less. */
575 	size = utf16_to_utf8_size(ins, ins_len, outs_len - 1);
576 
577 	if (size < 0)
578 		goto out;
579 
580 	if (!*outs) {
581 		outs_len = size + 1;
582 		*outs = ntfs_malloc(outs_len);
583 		if (!*outs)
584 			goto out;
585 	}
586 
587 	t = *outs;
588 
589 	for (i = 0; i < ins_len && ins[i]; i++) {
590 	    unsigned short c = le16_to_cpu(ins[i]);
591 			/* size not double-checked */
592 		if (halfpair) {
593 			if ((c >= 0xdc00) && (c < 0xe000)) {
594 				*t++ = 0xf0 + (((halfpair + 64) >> 8) & 7);
595 				*t++ = 0x80 + (((halfpair + 64) >> 2) & 63);
596 				*t++ = 0x80 + ((c >> 6) & 15) + ((halfpair & 3) << 4);
597 				*t++ = 0x80 + (c & 63);
598 				halfpair = 0;
599 			} else {
600 #if ALLOW_BROKEN_UNICODE
601 				/* The first UTF-16 unit of a surrogate pair has
602 				 * a value between 0xd800 and 0xdc00. It can be
603 				 * encoded as an individual UTF-8 sequence if we
604 				 * cannot combine it with the next UTF-16 unit
605 				 * unit as a surrogate pair. */
606 				*t++ = 0xe0 | (halfpair >> 12);
607 				*t++ = 0x80 | ((halfpair >> 6) & 0x3f);
608 				*t++ = 0x80 | (halfpair & 0x3f);
609 				halfpair = 0;
610 
611 				--i;
612 				continue;
613 #else
614 				goto fail;
615 #endif /* ALLOW_BROKEN_UNICODE */
616 			}
617 		} else if (c < 0x80) {
618 			*t++ = c;
619 	    	} else {
620 			if (c < 0x800) {
621 			   	*t++ = (0xc0 | ((c >> 6) & 0x3f));
622 			        *t++ = 0x80 | (c & 0x3f);
623 			} else if (c < 0xd800) {
624 			   	*t++ = 0xe0 | (c >> 12);
625 			   	*t++ = 0x80 | ((c >> 6) & 0x3f);
626 		        	*t++ = 0x80 | (c & 0x3f);
627 			} else if (c < 0xdc00)
628 				halfpair = c;
629 #if ALLOW_BROKEN_UNICODE
630 			else if (c < 0xe000) {
631 				*t++ = 0xe0 | (c >> 12);
632 				*t++ = 0x80 | ((c >> 6) & 0x3f);
633 				*t++ = 0x80 | (c & 0x3f);
634 			}
635 #endif /* ALLOW_BROKEN_UNICODE */
636 			else if (c >= 0xe000) {
637 				*t++ = 0xe0 | (c >> 12);
638 				*t++ = 0x80 | ((c >> 6) & 0x3f);
639 			        *t++ = 0x80 | (c & 0x3f);
640 			} else
641 				goto fail;
642 	        }
643 	}
644 #if ALLOW_BROKEN_UNICODE
645 	if (halfpair) { /* ending with a single surrogate */
646 		*t++ = 0xe0 | (halfpair >> 12);
647 		*t++ = 0x80 | ((halfpair >> 6) & 0x3f);
648 		*t++ = 0x80 | (halfpair & 0x3f);
649 	}
650 #endif /* ALLOW_BROKEN_UNICODE */
651 	*t = '\0';
652 
653 #if defined(__APPLE__) || defined(__DARWIN__)
654 #ifdef ENABLE_NFCONV
655 	if(nfconvert_utf8 && (t - *outs) > 0) {
656 		char *new_outs = NULL;
657 		int new_outs_len = ntfs_macosx_normalize_utf8(*outs, &new_outs, 0); // Normalize to decomposed form
658 		if(new_outs_len >= 0 && new_outs != NULL) {
659 			if(original_outs_value != *outs) {
660 				// We have allocated outs ourselves.
661 				free(*outs);
662 				*outs = new_outs;
663 				t = *outs + new_outs_len;
664 			}
665 			else {
666 				// We need to copy new_outs into the fixed outs buffer.
667 				memset(*outs, 0, original_outs_len);
668 				strncpy(*outs, new_outs, original_outs_len-1);
669 				t = *outs + original_outs_len;
670 				free(new_outs);
671 			}
672 		}
673 		else {
674 			ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFD: %s\n", *outs);
675 			ntfs_log_error("  new_outs=0x%p\n", new_outs);
676 			ntfs_log_error("  new_outs_len=%d\n", new_outs_len);
677 		}
678 	}
679 #endif /* ENABLE_NFCONV */
680 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
681 
682 	ret = t - *outs;
683 out:
684 	return ret;
685 fail:
686 	errno = EILSEQ;
687 	goto out;
688 }
689 
690 /*
691  * Return the amount of 16-bit elements in UTF-16LE needed
692  * (without the terminating null) to store given UTF-8 string.
693  *
694  * Return -1 with errno set if it's longer than PATH_MAX or string is invalid.
695  *
696  * Note: This does not check whether the input sequence is a valid utf8 string,
697  *	 and should be used only in context where such check is made!
698  */
utf8_to_utf16_size(const char * s)699 static int utf8_to_utf16_size(const char *s)
700 {
701 	int ret = -1;
702 	unsigned int byte;
703 	size_t count = 0;
704 
705 	while ((byte = *((const unsigned char *)s++))) {
706 		if (++count >= PATH_MAX)
707 			goto fail;
708 		if (byte >= 0xc0) {
709 			if (byte >= 0xF5) {
710 				errno = EILSEQ;
711 				goto out;
712 			}
713 			if (!*s)
714 				break;
715 			if (byte >= 0xC0)
716 				s++;
717 			if (!*s)
718 				break;
719 			if (byte >= 0xE0)
720 				s++;
721 			if (!*s)
722 				break;
723 			if (byte >= 0xF0) {
724 				s++;
725 				if (++count >= PATH_MAX)
726 					goto fail;
727 			}
728 		}
729 	}
730 	ret = count;
731 out:
732 	return ret;
733 fail:
734 	errno = ENAMETOOLONG;
735 	goto out;
736 }
737 /*
738  * This converts one UTF-8 sequence to cpu-endian Unicode value
739  * within range U+0 .. U+10ffff and excluding U+D800 .. U+DFFF
740  *
741  * Return the number of used utf8 bytes or -1 with errno set
742  * if sequence is invalid.
743  */
utf8_to_unicode(u32 * wc,const char * s)744 static int utf8_to_unicode(u32 *wc, const char *s)
745 {
746     	unsigned int byte = *((const unsigned char *)s);
747 
748 					/* single byte */
749 	if (byte == 0) {
750 		*wc = (u32) 0;
751 		return 0;
752 	} else if (byte < 0x80) {
753 		*wc = (u32) byte;
754 		return 1;
755 					/* double byte */
756 	} else if (byte < 0xc2) {
757 		goto fail;
758 	} else if (byte < 0xE0) {
759 		if ((s[1] & 0xC0) == 0x80) {
760 			*wc = ((u32)(byte & 0x1F) << 6)
761 			    | ((u32)(s[1] & 0x3F));
762 			return 2;
763 		} else
764 			goto fail;
765 					/* three-byte */
766 	} else if (byte < 0xF0) {
767 		if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)) {
768 			*wc = ((u32)(byte & 0x0F) << 12)
769 			    | ((u32)(s[1] & 0x3F) << 6)
770 			    | ((u32)(s[2] & 0x3F));
771 			/* Check valid ranges */
772 #if ALLOW_BROKEN_UNICODE
773 			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
774 			  || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
775 			  || ((*wc >= 0xe000) && (*wc <= 0xFFFF)))
776 				return 3;
777 #else
778 			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
779 			  || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
780 				return 3;
781 #endif /* ALLOW_BROKEN_UNICODE */
782 		}
783 		goto fail;
784 					/* four-byte */
785 	} else if (byte < 0xF5) {
786 		if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)
787 		  && ((s[3] & 0xC0) == 0x80)) {
788 			*wc = ((u32)(byte & 0x07) << 18)
789 			    | ((u32)(s[1] & 0x3F) << 12)
790 			    | ((u32)(s[2] & 0x3F) << 6)
791 			    | ((u32)(s[3] & 0x3F));
792 			/* Check valid ranges */
793 			if ((*wc <= 0x10ffff) && (*wc >= 0x10000))
794 				return 4;
795 		}
796 		goto fail;
797 	}
798 fail:
799 	errno = EILSEQ;
800 	return -1;
801 }
802 
803 /**
804  * ntfs_utf8_to_utf16 - convert a UTF-8 string to a UTF-16LE string
805  * @ins:	input multibyte string buffer
806  * @outs:	on return contains the (allocated) output utf16 string
807  * @outs_len:	length of output buffer in utf16 characters
808  *
809  * Return -1 with errno set.
810  */
ntfs_utf8_to_utf16(const char * ins,ntfschar ** outs)811 static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs)
812 {
813 #if defined(__APPLE__) || defined(__DARWIN__)
814 #ifdef ENABLE_NFCONV
815 	char *new_ins = NULL;
816 	if(nfconvert_utf8) {
817 		int new_ins_len;
818 		new_ins_len = ntfs_macosx_normalize_utf8(ins, &new_ins, 1); // Normalize to composed form
819 		if(new_ins_len >= 0)
820 			ins = new_ins;
821 		else
822 			ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFC: %s\n", ins);
823 	}
824 #endif /* ENABLE_NFCONV */
825 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
826 	const char *t = ins;
827 	u32 wc;
828 	BOOL allocated;
829 	ntfschar *outpos;
830 	int shorts, ret = -1;
831 
832 	shorts = utf8_to_utf16_size(ins);
833 	if (shorts < 0)
834 		goto fail;
835 
836 	allocated = FALSE;
837 	if (!*outs) {
838 		*outs = ntfs_malloc((shorts + 1) * sizeof(ntfschar));
839 		if (!*outs)
840 			goto fail;
841 		allocated = TRUE;
842 	}
843 
844 	outpos = *outs;
845 
846 	while(1) {
847 		int m  = utf8_to_unicode(&wc, t);
848 		if (m <= 0) {
849 			if (m < 0) {
850 				/* do not leave space allocated if failed */
851 				if (allocated) {
852 					free(*outs);
853 					*outs = (ntfschar*)NULL;
854 				}
855 				goto fail;
856 			}
857 			*outpos++ = const_cpu_to_le16(0);
858 			break;
859 		}
860 		if (wc < 0x10000)
861 			*outpos++ = cpu_to_le16(wc);
862 		else {
863 			wc -= 0x10000;
864 			*outpos++ = cpu_to_le16((wc >> 10) + 0xd800);
865 			*outpos++ = cpu_to_le16((wc & 0x3ff) + 0xdc00);
866 		}
867 		t += m;
868 	}
869 
870 	ret = --outpos - *outs;
871 fail:
872 #if defined(__APPLE__) || defined(__DARWIN__)
873 #ifdef ENABLE_NFCONV
874 	if(new_ins != NULL)
875 		free(new_ins);
876 #endif /* ENABLE_NFCONV */
877 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
878 	return ret;
879 }
880 
881 /**
882  * ntfs_ucstombs - convert a little endian Unicode string to a multibyte string
883  * @ins:	input Unicode string buffer
884  * @ins_len:	length of input string in Unicode characters
885  * @outs:	on return contains the (allocated) output multibyte string
886  * @outs_len:	length of output buffer in bytes (ignored if *@outs is NULL)
887  *
888  * Convert the input little endian, 2-byte Unicode string @ins, of length
889  * @ins_len into the multibyte string format dictated by the current locale.
890  *
891  * If *@outs is NULL, the function allocates the string and the caller is
892  * responsible for calling free(*@outs); when finished with it.
893  *
894  * On success the function returns the number of bytes written to the output
895  * string *@outs (>= 0), not counting the terminating NULL byte. If the output
896  * string buffer was allocated, *@outs is set to it.
897  *
898  * On error, -1 is returned, and errno is set to the error code. The following
899  * error codes can be expected:
900  *	EINVAL		Invalid arguments (e.g. @ins or @outs is NULL).
901  *	EILSEQ		The input string cannot be represented as a multibyte
902  *			sequence according to the current locale.
903  *	ENAMETOOLONG	Destination buffer is too small for input string.
904  *	ENOMEM		Not enough memory to allocate destination buffer.
905  */
ntfs_ucstombs(const ntfschar * ins,const int ins_len,char ** outs,int outs_len)906 int ntfs_ucstombs(const ntfschar *ins, const int ins_len, char **outs,
907 		int outs_len)
908 {
909 	char *mbs;
910 	int mbs_len;
911 #ifdef MB_CUR_MAX
912 	wchar_t wc;
913 	int i, o;
914 	int cnt = 0;
915 #ifdef HAVE_MBSINIT
916 	mbstate_t mbstate;
917 #endif
918 #endif /* MB_CUR_MAX */
919 
920 	if (!ins || !outs) {
921 		errno = EINVAL;
922 		return -1;
923 	}
924 	mbs = *outs;
925 	mbs_len = outs_len;
926 	if (mbs && !mbs_len) {
927 		errno = ENAMETOOLONG;
928 		return -1;
929 	}
930 	if (use_utf8)
931 		return ntfs_utf16_to_utf8(ins, ins_len, outs, outs_len);
932 #ifdef MB_CUR_MAX
933 	if (!mbs) {
934 		mbs_len = (ins_len + 1) * MB_CUR_MAX;
935 		mbs = ntfs_malloc(mbs_len);
936 		if (!mbs)
937 			return -1;
938 	}
939 #ifdef HAVE_MBSINIT
940 	memset(&mbstate, 0, sizeof(mbstate));
941 #else
942 	wctomb(NULL, 0);
943 #endif
944 	for (i = o = 0; i < ins_len; i++) {
945 		/* Reallocate memory if necessary or abort. */
946 		if ((int)(o + MB_CUR_MAX) > mbs_len) {
947 			char *tc;
948 			if (mbs == *outs) {
949 				errno = ENAMETOOLONG;
950 				return -1;
951 			}
952 			tc = ntfs_malloc((mbs_len + 64) & ~63);
953 			if (!tc)
954 				goto err_out;
955 			memcpy(tc, mbs, mbs_len);
956 			mbs_len = (mbs_len + 64) & ~63;
957 			free(mbs);
958 			mbs = tc;
959 		}
960 		/* Convert the LE Unicode character to a CPU wide character. */
961 		wc = (wchar_t)le16_to_cpu(ins[i]);
962 		if (!wc)
963 			break;
964 		/* Convert the CPU endian wide character to multibyte. */
965 #ifdef HAVE_MBSINIT
966 		cnt = wcrtomb(mbs + o, wc, &mbstate);
967 #else
968 		cnt = wctomb(mbs + o, wc);
969 #endif
970 		if (cnt == -1)
971 			goto err_out;
972 		if (cnt <= 0) {
973 			ntfs_log_debug("Eeek. cnt <= 0, cnt = %i\n", cnt);
974 			errno = EINVAL;
975 			goto err_out;
976 		}
977 		o += cnt;
978 	}
979 #ifdef HAVE_MBSINIT
980 	/* Make sure we are back in the initial state. */
981 	if (!mbsinit(&mbstate)) {
982 		ntfs_log_debug("Eeek. mbstate not in initial state!\n");
983 		errno = EILSEQ;
984 		goto err_out;
985 	}
986 #endif
987 	/* Now write the NULL character. */
988 	mbs[o] = '\0';
989 	if (*outs != mbs)
990 		*outs = mbs;
991 	return o;
992 err_out:
993 	if (mbs != *outs) {
994 		int eo = errno;
995 		free(mbs);
996 		errno = eo;
997 	}
998 #else /* MB_CUR_MAX */
999 	errno = EILSEQ;
1000 #endif /* MB_CUR_MAX */
1001 	return -1;
1002 }
1003 
1004 /**
1005  * ntfs_mbstoucs - convert a multibyte string to a little endian Unicode string
1006  * @ins:	input multibyte string buffer
1007  * @outs:	on return contains the (allocated) output Unicode string
1008  *
1009  * Convert the input multibyte string @ins, from the current locale into the
1010  * corresponding little endian, 2-byte Unicode string.
1011  *
1012  * The function allocates the string and the caller is responsible for calling
1013  * free(*@outs); when finished with it.
1014  *
1015  * On success the function returns the number of Unicode characters written to
1016  * the output string *@outs (>= 0), not counting the terminating Unicode NULL
1017  * character.
1018  *
1019  * On error, -1 is returned, and errno is set to the error code. The following
1020  * error codes can be expected:
1021  *	EINVAL		Invalid arguments (e.g. @ins or @outs is NULL).
1022  *	EILSEQ		The input string cannot be represented as a Unicode
1023  *			string according to the current locale.
1024  *	ENAMETOOLONG	Destination buffer is too small for input string.
1025  *	ENOMEM		Not enough memory to allocate destination buffer.
1026  */
ntfs_mbstoucs(const char * ins,ntfschar ** outs)1027 int ntfs_mbstoucs(const char *ins, ntfschar **outs)
1028 {
1029 #ifdef MB_CUR_MAX
1030 	ntfschar *ucs;
1031 	const char *s;
1032 	wchar_t wc;
1033 	int i, o, cnt, ins_len, ucs_len, ins_size;
1034 #ifdef HAVE_MBSINIT
1035 	mbstate_t mbstate;
1036 #endif
1037 #endif /* MB_CUR_MAX */
1038 
1039 	if (!ins || !outs) {
1040 		errno = EINVAL;
1041 		return -1;
1042 	}
1043 
1044 	if (use_utf8)
1045 		return ntfs_utf8_to_utf16(ins, outs);
1046 
1047 #ifdef MB_CUR_MAX
1048 	/* Determine the size of the multi-byte string in bytes. */
1049 	ins_size = strlen(ins);
1050 	/* Determine the length of the multi-byte string. */
1051 	s = ins;
1052 #if defined(HAVE_MBSINIT)
1053 	memset(&mbstate, 0, sizeof(mbstate));
1054 	ins_len = mbsrtowcs(NULL, (const char **)&s, 0, &mbstate);
1055 #ifdef __CYGWIN32__
1056 	if (!ins_len && *ins) {
1057 		/* Older Cygwin had broken mbsrtowcs() implementation. */
1058 		ins_len = strlen(ins);
1059 	}
1060 #endif
1061 #elif !defined(DJGPP)
1062 	ins_len = mbstowcs(NULL, s, 0);
1063 #else
1064 	/* Eeek!!! DJGPP has broken mbstowcs() implementation!!! */
1065 	ins_len = strlen(ins);
1066 #endif
1067 	if (ins_len == -1)
1068 		return ins_len;
1069 #ifdef HAVE_MBSINIT
1070 	if ((s != ins) || !mbsinit(&mbstate)) {
1071 #else
1072 	if (s != ins) {
1073 #endif
1074 		errno = EILSEQ;
1075 		return -1;
1076 	}
1077 	/* Add the NULL terminator. */
1078 	ins_len++;
1079 	ucs_len = ins_len;
1080 	ucs = ntfs_malloc(ucs_len * sizeof(ntfschar));
1081 	if (!ucs)
1082 		return -1;
1083 #ifdef HAVE_MBSINIT
1084 	memset(&mbstate, 0, sizeof(mbstate));
1085 #else
1086 	mbtowc(NULL, NULL, 0);
1087 #endif
1088 	for (i = o = cnt = 0; i < ins_size; i += cnt, o++) {
1089 		/* Reallocate memory if necessary. */
1090 		if (o >= ucs_len) {
1091 			ntfschar *tc;
1092 			ucs_len = (ucs_len * sizeof(ntfschar) + 64) & ~63;
1093 			tc = realloc(ucs, ucs_len);
1094 			if (!tc)
1095 				goto err_out;
1096 			ucs = tc;
1097 			ucs_len /= sizeof(ntfschar);
1098 		}
1099 		/* Convert the multibyte character to a wide character. */
1100 #ifdef HAVE_MBSINIT
1101 		cnt = mbrtowc(&wc, ins + i, ins_size - i, &mbstate);
1102 #else
1103 		cnt = mbtowc(&wc, ins + i, ins_size - i);
1104 #endif
1105 		if (!cnt)
1106 			break;
1107 		if (cnt == -1)
1108 			goto err_out;
1109 		if (cnt < -1) {
1110 			ntfs_log_trace("Eeek. cnt = %i\n", cnt);
1111 			errno = EINVAL;
1112 			goto err_out;
1113 		}
1114 		/* Make sure we are not overflowing the NTFS Unicode set. */
1115 		if ((unsigned long)wc >= (unsigned long)(1 <<
1116 				(8 * sizeof(ntfschar)))) {
1117 			errno = EILSEQ;
1118 			goto err_out;
1119 		}
1120 		/* Convert the CPU wide character to a LE Unicode character. */
1121 		ucs[o] = cpu_to_le16(wc);
1122 	}
1123 #ifdef HAVE_MBSINIT
1124 	/* Make sure we are back in the initial state. */
1125 	if (!mbsinit(&mbstate)) {
1126 		ntfs_log_trace("Eeek. mbstate not in initial state!\n");
1127 		errno = EILSEQ;
1128 		goto err_out;
1129 	}
1130 #endif
1131 	/* Now write the NULL character. */
1132 	ucs[o] = const_cpu_to_le16(L'\0');
1133 	*outs = ucs;
1134 	return o;
1135 err_out:
1136 	free(ucs);
1137 #else /* MB_CUR_MAX */
1138 	errno = EILSEQ;
1139 #endif /* MB_CUR_MAX */
1140 	return -1;
1141 }
1142 
1143 /*
1144  *		Turn a UTF8 name uppercase
1145  *
1146  *	Returns an allocated uppercase name which has to be freed by caller
1147  *	or NULL if there is an error (described by errno)
1148  */
1149 
1150 char *ntfs_uppercase_mbs(const char *low,
1151 			const ntfschar *upcase, u32 upcase_size)
1152 {
1153 	int size;
1154 	char *upp;
1155 	u32 wc;
1156 	int n;
1157 	const char *s;
1158 	char *t;
1159 
1160 	size = strlen(low);
1161 	upp = (char*)ntfs_malloc(3*size + 1);
1162 	if (upp) {
1163 		s = low;
1164 		t = upp;
1165 		do {
1166 			n = utf8_to_unicode(&wc, s);
1167 			if (n > 0) {
1168 				if (wc < upcase_size)
1169 					wc = le16_to_cpu(upcase[wc]);
1170 				if (wc < 0x80)
1171 					*t++ = wc;
1172 				else if (wc < 0x800) {
1173 					*t++ = (0xc0 | ((wc >> 6) & 0x3f));
1174 					*t++ = 0x80 | (wc & 0x3f);
1175 				} else if (wc < 0x10000) {
1176 					*t++ = 0xe0 | (wc >> 12);
1177 					*t++ = 0x80 | ((wc >> 6) & 0x3f);
1178 					*t++ = 0x80 | (wc & 0x3f);
1179 				} else {
1180 					*t++ = 0xf0 | ((wc >> 18) & 7);
1181 					*t++ = 0x80 | ((wc >> 12) & 63);
1182 					*t++ = 0x80 | ((wc >> 6) & 0x3f);
1183 					*t++ = 0x80 | (wc & 0x3f);
1184 				}
1185 			s += n;
1186 			}
1187 		} while (n > 0);
1188 		if (n < 0) {
1189 			free(upp);
1190 			upp = (char*)NULL;
1191 			errno = EILSEQ;
1192 		}
1193 		*t = 0;
1194 	}
1195 	return (upp);
1196 }
1197 
1198 /**
1199  * ntfs_upcase_table_build - build the default upcase table for NTFS
1200  * @uc:		destination buffer where to store the built table
1201  * @uc_len:	size of destination buffer in bytes
1202  *
1203  * ntfs_upcase_table_build() builds the default upcase table for NTFS and
1204  * stores it in the caller supplied buffer @uc of size @uc_len.
1205  *
1206  * Note, @uc_len must be at least 128kiB in size or bad things will happen!
1207  */
1208 void ntfs_upcase_table_build(ntfschar *uc, u32 uc_len)
1209 {
1210 	struct NEWUPPERCASE {
1211 		unsigned short first;
1212 		unsigned short last;
1213 		short diff;
1214 		unsigned char step;
1215 		unsigned char osmajor;
1216 		unsigned char osminor;
1217 	} ;
1218 
1219 	/*
1220 	 *	This is the table as defined by Windows XP
1221 	 */
1222 	static int uc_run_table[][3] = { /* Start, End, Add */
1223 	{0x0061, 0x007B,  -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72,  74},
1224 	{0x00E0, 0x00F7,  -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76,  86},
1225 	{0x00F8, 0x00FF,  -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100},
1226 	{0x0256, 0x0258, -205}, {0x1F00, 0x1F08,   8}, {0x1F78, 0x1F7A, 128},
1227 	{0x028A, 0x028C, -217}, {0x1F10, 0x1F16,   8}, {0x1F7A, 0x1F7C, 112},
1228 	{0x03AC, 0x03AD,  -38}, {0x1F20, 0x1F28,   8}, {0x1F7C, 0x1F7E, 126},
1229 	{0x03AD, 0x03B0,  -37}, {0x1F30, 0x1F38,   8}, {0x1FB0, 0x1FB2,   8},
1230 	{0x03B1, 0x03C2,  -32}, {0x1F40, 0x1F46,   8}, {0x1FD0, 0x1FD2,   8},
1231 	{0x03C2, 0x03C3,  -31}, {0x1F51, 0x1F52,   8}, {0x1FE0, 0x1FE2,   8},
1232 	{0x03C3, 0x03CC,  -32}, {0x1F53, 0x1F54,   8}, {0x1FE5, 0x1FE6,   7},
1233 	{0x03CC, 0x03CD,  -64}, {0x1F55, 0x1F56,   8}, {0x2170, 0x2180, -16},
1234 	{0x03CD, 0x03CF,  -63}, {0x1F57, 0x1F58,   8}, {0x24D0, 0x24EA, -26},
1235 	{0x0430, 0x0450,  -32}, {0x1F60, 0x1F68,   8}, {0xFF41, 0xFF5B, -32},
1236 	{0}
1237 	};
1238 	static int uc_dup_table[][2] = { /* Start, End */
1239 	{0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC},
1240 	{0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB},
1241 	{0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5},
1242 	{0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9},
1243 	{0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95},
1244 	{0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9},
1245 	{0}
1246 	};
1247 	static int uc_byte_table[][2] = { /* Offset, Value */
1248 	{0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196},
1249 	{0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C},
1250 	{0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D},
1251 	{0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F},
1252 	{0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9},
1253 	{0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE},
1254 	{0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7},
1255 	{0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197},
1256 	{0}
1257 	};
1258 
1259 /*
1260  *		Changes which were applied to later Windows versions
1261  *
1262  *   md5 for $UpCase from Winxp : 6fa3db2468275286210751e869d36373
1263  *                        Vista : 2f03b5a69d486ff3864cecbd07f24440
1264  *                        Win8 :  7ff498a44e45e77374cc7c962b1b92f2
1265  */
1266 	static const struct NEWUPPERCASE newuppercase[] = {
1267 						/* from Windows 6.0 (Vista) */
1268 		{ 0x37b, 0x37d, 0x82, 1, 6, 0 },
1269 		{ 0x1f80, 0x1f87, 0x8, 1, 6, 0 },
1270 		{ 0x1f90, 0x1f97, 0x8, 1, 6, 0 },
1271 		{ 0x1fa0, 0x1fa7, 0x8, 1, 6, 0 },
1272 		{ 0x2c30, 0x2c5e, -0x30, 1, 6, 0 },
1273 		{ 0x2d00, 0x2d25, -0x1c60, 1, 6, 0 },
1274 		{ 0x2c68, 0x2c6c, -0x1, 2, 6, 0 },
1275 		{ 0x219, 0x21f, -0x1, 2, 6, 0 },
1276 		{ 0x223, 0x233, -0x1, 2, 6, 0 },
1277 		{ 0x247, 0x24f, -0x1, 2, 6, 0 },
1278 		{ 0x3d9, 0x3e1, -0x1, 2, 6, 0 },
1279 		{ 0x48b, 0x48f, -0x1, 2, 6, 0 },
1280 		{ 0x4fb, 0x513, -0x1, 2, 6, 0 },
1281 		{ 0x2c81, 0x2ce3, -0x1, 2, 6, 0 },
1282 		{ 0x3f8, 0x3fb, -0x1, 3, 6, 0 },
1283 		{ 0x4c6, 0x4ce, -0x1, 4, 6, 0 },
1284 		{ 0x23c, 0x242, -0x1, 6, 6, 0 },
1285 		{ 0x4ed, 0x4f7, -0x1, 10, 6, 0 },
1286 		{ 0x450, 0x45d, -0x50, 13, 6, 0 },
1287 		{ 0x2c61, 0x2c76, -0x1, 21, 6, 0 },
1288 		{ 0x1fcc, 0x1ffc, -0x9, 48, 6, 0 },
1289 		{ 0x180, 0x180, 0xc3, 1, 6, 0 },
1290 		{ 0x195, 0x195, 0x61, 1, 6, 0 },
1291 		{ 0x19a, 0x19a, 0xa3, 1, 6, 0 },
1292 		{ 0x19e, 0x19e, 0x82, 1, 6, 0 },
1293 		{ 0x1bf, 0x1bf, 0x38, 1, 6, 0 },
1294 		{ 0x1f9, 0x1f9, -0x1, 1, 6, 0 },
1295 		{ 0x23a, 0x23a, 0x2a2b, 1, 6, 0 },
1296 		{ 0x23e, 0x23e, 0x2a28, 1, 6, 0 },
1297 		{ 0x26b, 0x26b, 0x29f7, 1, 6, 0 },
1298 		{ 0x27d, 0x27d, 0x29e7, 1, 6, 0 },
1299 		{ 0x280, 0x280, -0xda, 1, 6, 0 },
1300 		{ 0x289, 0x289, -0x45, 1, 6, 0 },
1301 		{ 0x28c, 0x28c, -0x47, 1, 6, 0 },
1302 		{ 0x3f2, 0x3f2, 0x7, 1, 6, 0 },
1303 		{ 0x4cf, 0x4cf, -0xf, 1, 6, 0 },
1304 		{ 0x1d7d, 0x1d7d, 0xee6, 1, 6, 0 },
1305 		{ 0x1fb3, 0x1fb3, 0x9, 1, 6, 0 },
1306 		{ 0x214e, 0x214e, -0x1c, 1, 6, 0 },
1307 		{ 0x2184, 0x2184, -0x1, 1, 6, 0 },
1308 						/* from Windows 6.1 (Win7) */
1309 		{ 0x23a, 0x23e,  0x0, 4, 6, 1 },
1310 		{ 0x250, 0x250,  0x2a1f, 2, 6, 1 },
1311 		{ 0x251, 0x251,  0x2a1c, 2, 6, 1 },
1312 		{ 0x271, 0x271,  0x29fd, 2, 6, 1 },
1313 		{ 0x371, 0x373, -0x1, 2, 6, 1 },
1314 		{ 0x377, 0x377, -0x1, 2, 6, 1 },
1315 		{ 0x3c2, 0x3c2,  0x0, 2, 6, 1 },
1316 		{ 0x3d7, 0x3d7, -0x8, 2, 6, 1 },
1317 		{ 0x515, 0x523, -0x1, 2, 6, 1 },
1318 			/* below, -0x75fc stands for 0x8a04 and truncation */
1319 		{ 0x1d79, 0x1d79, -0x75fc, 2, 6, 1 },
1320 		{ 0x1efb, 0x1eff, -0x1, 2, 6, 1 },
1321 		{ 0x1fc3, 0x1ff3,  0x9, 48, 6, 1 },
1322 		{ 0x1fcc, 0x1ffc,  0x0, 48, 6, 1 },
1323 		{ 0x2c65, 0x2c65, -0x2a2b, 2, 6, 1 },
1324 		{ 0x2c66, 0x2c66, -0x2a28, 2, 6, 1 },
1325 		{ 0x2c73, 0x2c73, -0x1, 2, 6, 1 },
1326 		{ 0xa641, 0xa65f, -0x1, 2, 6, 1 },
1327 		{ 0xa663, 0xa66d, -0x1, 2, 6, 1 },
1328 		{ 0xa681, 0xa697, -0x1, 2, 6, 1 },
1329 		{ 0xa723, 0xa72f, -0x1, 2, 6, 1 },
1330 		{ 0xa733, 0xa76f, -0x1, 2, 6, 1 },
1331 		{ 0xa77a, 0xa77c, -0x1, 2, 6, 1 },
1332 		{ 0xa77f, 0xa787, -0x1, 2, 6, 1 },
1333 		{ 0xa78c, 0xa78c, -0x1, 2, 6, 1 },
1334 							/* end mark */
1335 		{ 0 }
1336 	} ;
1337 
1338 	int i, r;
1339 	int k, off;
1340 	const struct NEWUPPERCASE *puc;
1341 
1342 	memset((char*)uc, 0, uc_len);
1343 	uc_len >>= 1;
1344 	if (uc_len > 65536)
1345 		uc_len = 65536;
1346 	for (i = 0; (u32)i < uc_len; i++)
1347 		uc[i] = cpu_to_le16(i);
1348 	for (r = 0; uc_run_table[r][0]; r++) {
1349 		off = uc_run_table[r][2];
1350 		for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)
1351 			uc[i] = cpu_to_le16(i + off);
1352 	}
1353 	for (r = 0; uc_dup_table[r][0]; r++)
1354 		for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)
1355 			uc[i + 1] = cpu_to_le16(i);
1356 	for (r = 0; uc_byte_table[r][0]; r++) {
1357 		k = uc_byte_table[r][1];
1358 		uc[uc_byte_table[r][0]] = cpu_to_le16(k);
1359 	}
1360 	for (r=0; newuppercase[r].first; r++) {
1361 		puc = &newuppercase[r];
1362 		if ((puc->osmajor < UPCASE_MAJOR)
1363 		  || ((puc->osmajor == UPCASE_MAJOR)
1364 		     && (puc->osminor <= UPCASE_MINOR))) {
1365 			off = puc->diff;
1366 			for (i = puc->first; i <= puc->last; i += puc->step)
1367 				uc[i] = cpu_to_le16(i + off);
1368 		}
1369 	}
1370 }
1371 
1372 /*
1373  *		Allocate and build the default upcase table
1374  *
1375  *	Returns the number of entries
1376  *		0 if failed
1377  */
1378 
1379 #define UPCASE_LEN 65536 /* default number of entries in upcase */
1380 
1381 u32 ntfs_upcase_build_default(ntfschar **upcase)
1382 {
1383 	u32 upcase_len = 0;
1384 
1385 	*upcase = (ntfschar*)ntfs_malloc(UPCASE_LEN*2);
1386 	if (*upcase) {
1387 		ntfs_upcase_table_build(*upcase, UPCASE_LEN*2);
1388 		upcase_len = UPCASE_LEN;
1389 	}
1390 	return (upcase_len);
1391 }
1392 
1393 /*
1394  *		Build a table for converting to lower case
1395  *
1396  *	This is only meaningful when there is a single lower case
1397  *	character leading to an upper case one, and currently the
1398  *	only exception is the greek letter sigma which has a single
1399  *	upper case glyph (code U+03A3), but two lower case glyphs
1400  *	(code U+03C3 and U+03C2, the latter to be used at the end
1401  *	of a word). In the following implementation the upper case
1402  *	sigma will be lowercased as U+03C3.
1403  */
1404 
1405 ntfschar *ntfs_locase_table_build(const ntfschar *uc, u32 uc_cnt)
1406 {
1407 	ntfschar *lc;
1408 	u32 upp;
1409 	u32 i;
1410 
1411 	lc = (ntfschar*)ntfs_malloc(uc_cnt*sizeof(ntfschar));
1412 	if (lc) {
1413 		for (i=0; i<uc_cnt; i++)
1414 			lc[i] = cpu_to_le16(i);
1415 		for (i=0; i<uc_cnt; i++) {
1416 			upp = le16_to_cpu(uc[i]);
1417 			if ((upp != i) && (upp < uc_cnt))
1418 				lc[upp] = cpu_to_le16(i);
1419 		}
1420 	} else
1421 		ntfs_log_error("Could not build the locase table\n");
1422 	return (lc);
1423 }
1424 
1425 /**
1426  * ntfs_str2ucs - convert a string to a valid NTFS file name
1427  * @s:		input string
1428  * @len:	length of output buffer in Unicode characters
1429  *
1430  * Convert the input @s string into the corresponding little endian,
1431  * 2-byte Unicode string. The length of the converted string is less
1432  * or equal to the maximum length allowed by the NTFS format (255).
1433  *
1434  * If @s is NULL then return AT_UNNAMED.
1435  *
1436  * On success the function returns the Unicode string in an allocated
1437  * buffer and the caller is responsible to free it when it's not needed
1438  * anymore.
1439  *
1440  * On error NULL is returned and errno is set to the error code.
1441  */
1442 ntfschar *ntfs_str2ucs(const char *s, int *len)
1443 {
1444 	ntfschar *ucs = NULL;
1445 
1446 	if (s && ((*len = ntfs_mbstoucs(s, &ucs)) == -1)) {
1447 		ntfs_log_perror("Couldn't convert '%s' to Unicode", s);
1448 		return NULL;
1449 	}
1450 	if (*len > NTFS_MAX_NAME_LEN) {
1451 		free(ucs);
1452 		errno = ENAMETOOLONG;
1453 		return NULL;
1454 	}
1455 	if (!ucs || !*len) {
1456 		ucs  = AT_UNNAMED;
1457 		*len = 0;
1458 	}
1459 	return ucs;
1460 }
1461 
1462 /**
1463  * ntfs_ucsfree - free memory allocated by ntfs_str2ucs()
1464  * @ucs		input string to be freed
1465  *
1466  * Free memory at @ucs and which was allocated by ntfs_str2ucs.
1467  *
1468  * Return value: none.
1469  */
1470 void ntfs_ucsfree(ntfschar *ucs)
1471 {
1472 	if (ucs && (ucs != AT_UNNAMED))
1473 		free(ucs);
1474 }
1475 
1476 /*
1477  *		Check whether a name contains no chars forbidden
1478  *	for DOS or Win32 use
1479  *
1480  *	If @strict is TRUE, then trailing dots and spaces are forbidden.
1481  *	These names are technically allowed in the Win32 namespace, but
1482  *	they can be problematic.  See comment for FILE_NAME_WIN32.
1483  *
1484  *	If there is a bad char, errno is set to EINVAL
1485  */
1486 
1487 BOOL ntfs_forbidden_chars(const ntfschar *name, int len, BOOL strict)
1488 {
1489 	BOOL forbidden;
1490 	int ch;
1491 	int i;
1492 	static const u32 mainset = (1L << ('\"' - 0x20))
1493 			| (1L << ('*' - 0x20))
1494 			| (1L << ('/' - 0x20))
1495 			| (1L << (':' - 0x20))
1496 			| (1L << ('<' - 0x20))
1497 			| (1L << ('>' - 0x20))
1498 			| (1L << ('?' - 0x20));
1499 
1500 	forbidden = (len == 0) ||
1501 		    (strict && (name[len-1] == const_cpu_to_le16(' ') ||
1502 				name[len-1] == const_cpu_to_le16('.')));
1503 	for (i=0; i<len; i++) {
1504 		ch = le16_to_cpu(name[i]);
1505 		if ((ch < 0x20)
1506 		    || ((ch < 0x40)
1507 			&& ((1L << (ch - 0x20)) & mainset))
1508 		    || (ch == '\\')
1509 		    || (ch == '|'))
1510 			forbidden = TRUE;
1511 	}
1512 	if (forbidden)
1513 		errno = EINVAL;
1514 	return (forbidden);
1515 }
1516 
1517 /*
1518  *		Check whether a name contains no forbidden chars and
1519  *	is not a reserved name for DOS or Win32 use
1520  *
1521  *	The reserved names are CON, PRN, AUX, NUL, COM1..COM9, LPT1..LPT9
1522  *	with no suffix or any suffix.
1523  *
1524  *	If @strict is TRUE, then trailing dots and spaces are forbidden.
1525  *	These names are technically allowed in the Win32 namespace, but
1526  *	they can be problematic.  See comment for FILE_NAME_WIN32.
1527  *
1528  *	If the name is forbidden, errno is set to EINVAL
1529  */
1530 
1531 BOOL ntfs_forbidden_names(ntfs_volume *vol, const ntfschar *name, int len,
1532 			  BOOL strict)
1533 {
1534 	BOOL forbidden;
1535 	int h;
1536 	static const ntfschar dot = const_cpu_to_le16('.');
1537 	static const ntfschar con[] = { const_cpu_to_le16('c'),
1538 			const_cpu_to_le16('o'), const_cpu_to_le16('n') };
1539 	static const ntfschar prn[] = { const_cpu_to_le16('p'),
1540 			const_cpu_to_le16('r'), const_cpu_to_le16('n') };
1541 	static const ntfschar aux[] = { const_cpu_to_le16('a'),
1542 			const_cpu_to_le16('u'), const_cpu_to_le16('x') };
1543 	static const ntfschar nul[] = { const_cpu_to_le16('n'),
1544 			const_cpu_to_le16('u'), const_cpu_to_le16('l') };
1545 	static const ntfschar com[] = { const_cpu_to_le16('c'),
1546 			const_cpu_to_le16('o'), const_cpu_to_le16('m') };
1547 	static const ntfschar lpt[] = { const_cpu_to_le16('l'),
1548 			const_cpu_to_le16('p'), const_cpu_to_le16('t') };
1549 
1550 	forbidden = ntfs_forbidden_chars(name, len, strict);
1551 	if (!forbidden && (len >= 3)) {
1552 		/*
1553 		 * Rough hash check to tell whether the first couple of chars
1554 		 * may be one of CO PR AU NU LP or lowercase variants.
1555 		 */
1556 		h = ((le16_to_cpu(name[0]) & 31)*48)
1557 				^ ((le16_to_cpu(name[1]) & 31)*165);
1558 		if ((h % 23) == 17) {
1559 			/* do a full check, depending on the third char */
1560 			switch (le16_to_cpu(name[2]) & ~0x20) {
1561 			case 'N' :
1562 				if (((len == 3) || (name[3] == dot))
1563 				    && (!ntfs_ucsncasecmp(name, con, 3,
1564 						vol->upcase, vol->upcase_len)
1565 					|| !ntfs_ucsncasecmp(name, prn, 3,
1566 						vol->upcase, vol->upcase_len)))
1567 					forbidden = TRUE;
1568 				break;
1569 			case 'X' :
1570 				if (((len == 3) || (name[3] == dot))
1571 				    && !ntfs_ucsncasecmp(name, aux, 3,
1572 						vol->upcase, vol->upcase_len))
1573 					forbidden = TRUE;
1574 				break;
1575 			case 'L' :
1576 				if (((len == 3) || (name[3] == dot))
1577 				    && !ntfs_ucsncasecmp(name, nul, 3,
1578 						vol->upcase, vol->upcase_len))
1579 					forbidden = TRUE;
1580 				break;
1581 			case 'M' :
1582 				if ((len > 3)
1583 				    && (le16_to_cpu(name[3]) >= '1')
1584 				    && (le16_to_cpu(name[3]) <= '9')
1585 				    && ((len == 4) || (name[4] == dot))
1586 				    && !ntfs_ucsncasecmp(name, com, 3,
1587 						vol->upcase, vol->upcase_len))
1588 					forbidden = TRUE;
1589 				break;
1590 			case 'T' :
1591 				if ((len > 3)
1592 				    && (le16_to_cpu(name[3]) >= '1')
1593 				    && (le16_to_cpu(name[3]) <= '9')
1594 				    && ((len == 4) || (name[4] == dot))
1595 				    && !ntfs_ucsncasecmp(name, lpt, 3,
1596 						vol->upcase, vol->upcase_len))
1597 					forbidden = TRUE;
1598 				break;
1599 			}
1600 		}
1601 	}
1602 
1603 	if (forbidden)
1604 		errno = EINVAL;
1605 	return (forbidden);
1606 }
1607 
1608 /*
1609  *		Check whether the same name can be used as a DOS and
1610  *	a Win32 name
1611  *
1612  *	The names must be the same, or the short name the uppercase
1613  *	variant of the long name
1614  */
1615 
1616 BOOL ntfs_collapsible_chars(ntfs_volume *vol,
1617 			const ntfschar *shortname, int shortlen,
1618 			const ntfschar *longname, int longlen)
1619 {
1620 	BOOL collapsible;
1621 	unsigned int ch;
1622 	unsigned int cs;
1623 	int i;
1624 
1625 	collapsible = shortlen == longlen;
1626 	for (i=0; collapsible && (i<shortlen); i++) {
1627 		ch = le16_to_cpu(longname[i]);
1628 		cs = le16_to_cpu(shortname[i]);
1629 		if ((cs != ch)
1630 		    && ((ch >= vol->upcase_len)
1631 			|| (cs >= vol->upcase_len)
1632 			|| (vol->upcase[cs] != vol->upcase[ch])))
1633 				collapsible = FALSE;
1634 	}
1635 	return (collapsible);
1636 }
1637 
1638 /*
1639  * Define the character encoding to be used.
1640  * Use UTF-8 unless specified otherwise.
1641  */
1642 
1643 int ntfs_set_char_encoding(const char *locale)
1644 {
1645 	use_utf8 = 0;
1646 	if (!locale || strstr(locale,"utf8") || strstr(locale,"UTF8")
1647 	    || strstr(locale,"utf-8") || strstr(locale,"UTF-8"))
1648 		use_utf8 = 1;
1649 	else
1650 		if (setlocale(LC_ALL, locale))
1651 			use_utf8 = 0;
1652 		else {
1653 			ntfs_log_error("Invalid locale, encoding to UTF-8\n");
1654 			use_utf8 = 1;
1655 	 	}
1656 	return 0; /* always successful */
1657 }
1658 
1659 #if defined(__APPLE__) || defined(__DARWIN__)
1660 
1661 int ntfs_macosx_normalize_filenames(int normalize) {
1662 #ifdef ENABLE_NFCONV
1663 	if (normalize == 0 || normalize == 1) {
1664 		nfconvert_utf8 = normalize;
1665 		return 0;
1666 	}
1667 	else {
1668 		return -1;
1669 	}
1670 #else
1671 	return -1;
1672 #endif /* ENABLE_NFCONV */
1673 }
1674 
1675 int ntfs_macosx_normalize_utf8(const char *utf8_string, char **target,
1676 		int composed)
1677 {
1678 #ifdef ENABLE_NFCONV
1679 	/* For this code to compile, the CoreFoundation framework must be fed to
1680 	 * the linker. */
1681 	CFStringRef cfSourceString;
1682 	CFMutableStringRef cfMutableString;
1683 	CFRange rangeToProcess;
1684 	CFIndex requiredBufferLength;
1685 	char *result = NULL;
1686 	int resultLength = -1;
1687 
1688 	/* Convert the UTF-8 string to a CFString. */
1689 	cfSourceString = CFStringCreateWithCString(kCFAllocatorDefault,
1690 		utf8_string, kCFStringEncodingUTF8);
1691 	if (cfSourceString == NULL) {
1692 		ntfs_log_error("CFStringCreateWithCString failed!\n");
1693 		return -2;
1694 	}
1695 
1696 	/* Create a mutable string from cfSourceString that we are free to
1697 	 * modify. */
1698 	cfMutableString = CFStringCreateMutableCopy(kCFAllocatorDefault, 0,
1699 		cfSourceString);
1700 	CFRelease(cfSourceString); /* End-of-life. */
1701 	if (cfMutableString == NULL) {
1702 		ntfs_log_error("CFStringCreateMutableCopy failed!\n");
1703 		return -3;
1704 	}
1705 
1706 	/* Normalize the mutable string to the desired normalization form. */
1707 	CFStringNormalize(cfMutableString, (composed != 0 ?
1708 		kCFStringNormalizationFormC : kCFStringNormalizationFormD));
1709 
1710 	/* Store the resulting string in a '\0'-terminated UTF-8 encoded char*
1711 	 * buffer. */
1712 	rangeToProcess = CFRangeMake(0, CFStringGetLength(cfMutableString));
1713 	if (CFStringGetBytes(cfMutableString, rangeToProcess,
1714 		kCFStringEncodingUTF8, 0, false, NULL, 0,
1715 		&requiredBufferLength) > 0)
1716 	{
1717 		resultLength = sizeof(char) * (requiredBufferLength + 1);
1718 		result = ntfs_calloc(resultLength);
1719 
1720 		if (result != NULL) {
1721 			if (CFStringGetBytes(cfMutableString, rangeToProcess,
1722 				kCFStringEncodingUTF8, 0, false,
1723 				(UInt8*) result, resultLength - 1,
1724 				&requiredBufferLength) <= 0)
1725 			{
1726 				ntfs_log_error("Could not perform UTF-8 "
1727 					"conversion of normalized "
1728 					"CFMutableString.\n");
1729 				free(result);
1730 				result = NULL;
1731 			}
1732 		}
1733 		else {
1734 			ntfs_log_error("Could not perform a ntfs_calloc of %d "
1735 				"bytes for char *result.\n", resultLength);
1736 		}
1737 	}
1738 	else {
1739 		ntfs_log_error("Could not perform check for required length of "
1740 			"UTF-8 conversion of normalized CFMutableString.\n");
1741 	}
1742 
1743 	CFRelease(cfMutableString);
1744 
1745 	if (result != NULL) {
1746 	 	*target = result;
1747 		return resultLength - 1;
1748 	}
1749 	else {
1750 		return -1;
1751 	}
1752 #else
1753 	return -1;
1754 #endif /* ENABLE_NFCONV */
1755 }
1756 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
1757