• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  linux/fs/hfsplus/unicode.c
3  *
4  * Copyright (C) 2001
5  * Brad Boyer (flar@allandria.com)
6  * (C) 2003 Ardis Technologies <roman@ardistech.com>
7  *
8  * Handler routines for unicode strings
9  */
10 
11 #include <linux/types.h>
12 #include <linux/nls.h>
13 #include "hfsplus_fs.h"
14 #include "hfsplus_raw.h"
15 
16 /* Fold the case of a unicode char, given the 16 bit value */
17 /* Returns folded char, or 0 if ignorable */
case_fold(u16 c)18 static inline u16 case_fold(u16 c)
19 {
20         u16 tmp;
21 
22         tmp = hfsplus_case_fold_table[c >> 8];
23         if (tmp)
24                 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
25         else
26                 tmp = c;
27         return tmp;
28 }
29 
30 /* Compare unicode strings, return values like normal strcmp */
hfsplus_strcasecmp(const struct hfsplus_unistr * s1,const struct hfsplus_unistr * s2)31 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
32 		       const struct hfsplus_unistr *s2)
33 {
34 	u16 len1, len2, c1, c2;
35 	const hfsplus_unichr *p1, *p2;
36 
37 	len1 = be16_to_cpu(s1->length);
38 	len2 = be16_to_cpu(s2->length);
39 	p1 = s1->unicode;
40 	p2 = s2->unicode;
41 
42 	while (1) {
43 		c1 = c2 = 0;
44 
45 		while (len1 && !c1) {
46 			c1 = case_fold(be16_to_cpu(*p1));
47 			p1++;
48 			len1--;
49 		}
50 		while (len2 && !c2) {
51 			c2 = case_fold(be16_to_cpu(*p2));
52 			p2++;
53 			len2--;
54 		}
55 
56 		if (c1 != c2)
57 			return (c1 < c2) ? -1 : 1;
58 		if (!c1 && !c2)
59 			return 0;
60 	}
61 }
62 
63 /* Compare names as a sequence of 16-bit unsigned integers */
hfsplus_strcmp(const struct hfsplus_unistr * s1,const struct hfsplus_unistr * s2)64 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
65 		   const struct hfsplus_unistr *s2)
66 {
67 	u16 len1, len2, c1, c2;
68 	const hfsplus_unichr *p1, *p2;
69 	int len;
70 
71 	len1 = be16_to_cpu(s1->length);
72 	len2 = be16_to_cpu(s2->length);
73 	p1 = s1->unicode;
74 	p2 = s2->unicode;
75 
76 	for (len = min(len1, len2); len > 0; len--) {
77 		c1 = be16_to_cpu(*p1);
78 		c2 = be16_to_cpu(*p2);
79 		if (c1 != c2)
80 			return c1 < c2 ? -1 : 1;
81 		p1++;
82 		p2++;
83 	}
84 
85 	return len1 < len2 ? -1 :
86 	       len1 > len2 ? 1 : 0;
87 }
88 
89 
90 #define Hangul_SBase	0xac00
91 #define Hangul_LBase	0x1100
92 #define Hangul_VBase	0x1161
93 #define Hangul_TBase	0x11a7
94 #define Hangul_SCount	11172
95 #define Hangul_LCount	19
96 #define Hangul_VCount	21
97 #define Hangul_TCount	28
98 #define Hangul_NCount	(Hangul_VCount * Hangul_TCount)
99 
100 
hfsplus_compose_lookup(u16 * p,u16 cc)101 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
102 {
103 	int i, s, e;
104 
105 	s = 1;
106 	e = p[1];
107 	if (!e || cc < p[s * 2] || cc > p[e * 2])
108 		return NULL;
109 	do {
110 		i = (s + e) / 2;
111 		if (cc > p[i * 2])
112 			s = i + 1;
113 		else if (cc < p[i * 2])
114 			e = i - 1;
115 		else
116 			return hfsplus_compose_table + p[i * 2 + 1];
117 	} while (s <= e);
118 	return NULL;
119 }
120 
hfsplus_uni2asc(struct super_block * sb,const struct hfsplus_unistr * ustr,char * astr,int * len_p)121 int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p)
122 {
123 	const hfsplus_unichr *ip;
124 	struct nls_table *nls = HFSPLUS_SB(sb).nls;
125 	u8 *op;
126 	u16 cc, c0, c1;
127 	u16 *ce1, *ce2;
128 	int i, len, ustrlen, res, compose;
129 
130 	op = astr;
131 	ip = ustr->unicode;
132 	ustrlen = be16_to_cpu(ustr->length);
133 	len = *len_p;
134 	ce1 = NULL;
135 	compose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
136 
137 	while (ustrlen > 0) {
138 		c0 = be16_to_cpu(*ip++);
139 		ustrlen--;
140 		/* search for single decomposed char */
141 		if (likely(compose))
142 			ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
143 		if (ce1 && (cc = ce1[0])) {
144 			/* start of a possibly decomposed Hangul char */
145 			if (cc != 0xffff)
146 				goto done;
147 			if (!ustrlen)
148 				goto same;
149 			c1 = be16_to_cpu(*ip) - Hangul_VBase;
150 			if (c1 < Hangul_VCount) {
151 				/* compose the Hangul char */
152 				cc = (c0 - Hangul_LBase) * Hangul_VCount;
153 				cc = (cc + c1) * Hangul_TCount;
154 				cc += Hangul_SBase;
155 				ip++;
156 				ustrlen--;
157 				if (!ustrlen)
158 					goto done;
159 				c1 = be16_to_cpu(*ip) - Hangul_TBase;
160 				if (c1 > 0 && c1 < Hangul_TCount) {
161 					cc += c1;
162 					ip++;
163 					ustrlen--;
164 				}
165 				goto done;
166 			}
167 		}
168 		while (1) {
169 			/* main loop for common case of not composed chars */
170 			if (!ustrlen)
171 				goto same;
172 			c1 = be16_to_cpu(*ip);
173 			if (likely(compose))
174 				ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c1);
175 			if (ce1)
176 				break;
177 			switch (c0) {
178 			case 0:
179 				c0 = 0x2400;
180 				break;
181 			case '/':
182 				c0 = ':';
183 				break;
184 			}
185 			res = nls->uni2char(c0, op, len);
186 			if (res < 0) {
187 				if (res == -ENAMETOOLONG)
188 					goto out;
189 				*op = '?';
190 				res = 1;
191 			}
192 			op += res;
193 			len -= res;
194 			c0 = c1;
195 			ip++;
196 			ustrlen--;
197 		}
198 		ce2 = hfsplus_compose_lookup(ce1, c0);
199 		if (ce2) {
200 			i = 1;
201 			while (i < ustrlen) {
202 				ce1 = hfsplus_compose_lookup(ce2, be16_to_cpu(ip[i]));
203 				if (!ce1)
204 					break;
205 				i++;
206 				ce2 = ce1;
207 			}
208 			if ((cc = ce2[0])) {
209 				ip += i;
210 				ustrlen -= i;
211 				goto done;
212 			}
213 		}
214 	same:
215 		switch (c0) {
216 		case 0:
217 			cc = 0x2400;
218 			break;
219 		case '/':
220 			cc = ':';
221 			break;
222 		default:
223 			cc = c0;
224 		}
225 	done:
226 		res = nls->uni2char(cc, op, len);
227 		if (res < 0) {
228 			if (res == -ENAMETOOLONG)
229 				goto out;
230 			*op = '?';
231 			res = 1;
232 		}
233 		op += res;
234 		len -= res;
235 	}
236 	res = 0;
237 out:
238 	*len_p = (char *)op - astr;
239 	return res;
240 }
241 
242 /*
243  * Convert one or more ASCII characters into a single unicode character.
244  * Returns the number of ASCII characters corresponding to the unicode char.
245  */
asc2unichar(struct super_block * sb,const char * astr,int len,wchar_t * uc)246 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
247 			      wchar_t *uc)
248 {
249 	int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc);
250 	if (size <= 0) {
251 		*uc = '?';
252 		size = 1;
253 	}
254 	switch (*uc) {
255 	case 0x2400:
256 		*uc = 0;
257 		break;
258 	case ':':
259 		*uc = '/';
260 		break;
261 	}
262 	return size;
263 }
264 
265 /* Decomposes a single unicode character. */
decompose_unichar(wchar_t uc,int * size)266 static inline u16 *decompose_unichar(wchar_t uc, int *size)
267 {
268 	int off;
269 
270 	off = hfsplus_decompose_table[(uc >> 12) & 0xf];
271 	if (off == 0 || off == 0xffff)
272 		return NULL;
273 
274 	off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
275 	if (!off)
276 		return NULL;
277 
278 	off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
279 	if (!off)
280 		return NULL;
281 
282 	off = hfsplus_decompose_table[off + (uc & 0xf)];
283 	*size = off & 3;
284 	if (*size == 0)
285 		return NULL;
286 	return hfsplus_decompose_table + (off / 4);
287 }
288 
hfsplus_asc2uni(struct super_block * sb,struct hfsplus_unistr * ustr,const char * astr,int len)289 int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
290 		    const char *astr, int len)
291 {
292 	int size, dsize, decompose;
293 	u16 *dstr, outlen = 0;
294 	wchar_t c;
295 
296 	decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
297 	while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
298 		size = asc2unichar(sb, astr, len, &c);
299 
300 		if (decompose && (dstr = decompose_unichar(c, &dsize))) {
301 			if (outlen + dsize > HFSPLUS_MAX_STRLEN)
302 				break;
303 			do {
304 				ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
305 			} while (--dsize > 0);
306 		} else
307 			ustr->unicode[outlen++] = cpu_to_be16(c);
308 
309 		astr += size;
310 		len -= size;
311 	}
312 	ustr->length = cpu_to_be16(outlen);
313 	if (len > 0)
314 		return -ENAMETOOLONG;
315 	return 0;
316 }
317 
318 /*
319  * Hash a string to an integer as appropriate for the HFS+ filesystem.
320  * Composed unicode characters are decomposed and case-folding is performed
321  * if the appropriate bits are (un)set on the superblock.
322  */
hfsplus_hash_dentry(struct dentry * dentry,struct qstr * str)323 int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
324 {
325 	struct super_block *sb = dentry->d_sb;
326 	const char *astr;
327 	const u16 *dstr;
328 	int casefold, decompose, size, len;
329 	unsigned long hash;
330 	wchar_t c;
331 	u16 c2;
332 
333 	casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD);
334 	decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
335 	hash = init_name_hash();
336 	astr = str->name;
337 	len = str->len;
338 	while (len > 0) {
339 		int uninitialized_var(dsize);
340 		size = asc2unichar(sb, astr, len, &c);
341 		astr += size;
342 		len -= size;
343 
344 		if (decompose && (dstr = decompose_unichar(c, &dsize))) {
345 			do {
346 				c2 = *dstr++;
347 				if (!casefold || (c2 = case_fold(c2)))
348 					hash = partial_name_hash(c2, hash);
349 			} while (--dsize > 0);
350 		} else {
351 			c2 = c;
352 			if (!casefold || (c2 = case_fold(c2)))
353 				hash = partial_name_hash(c2, hash);
354 		}
355 	}
356 	str->hash = end_name_hash(hash);
357 
358 	return 0;
359 }
360 
361 /*
362  * Compare strings with HFS+ filename ordering.
363  * Composed unicode characters are decomposed and case-folding is performed
364  * if the appropriate bits are (un)set on the superblock.
365  */
hfsplus_compare_dentry(struct dentry * dentry,struct qstr * s1,struct qstr * s2)366 int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2)
367 {
368 	struct super_block *sb = dentry->d_sb;
369 	int casefold, decompose, size;
370 	int dsize1, dsize2, len1, len2;
371 	const u16 *dstr1, *dstr2;
372 	const char *astr1, *astr2;
373 	u16 c1, c2;
374 	wchar_t c;
375 
376 	casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD);
377 	decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE);
378 	astr1 = s1->name;
379 	len1 = s1->len;
380 	astr2 = s2->name;
381 	len2 = s2->len;
382 	dsize1 = dsize2 = 0;
383 	dstr1 = dstr2 = NULL;
384 
385 	while (len1 > 0 && len2 > 0) {
386 		if (!dsize1) {
387 			size = asc2unichar(sb, astr1, len1, &c);
388 			astr1 += size;
389 			len1 -= size;
390 
391 			if (!decompose || !(dstr1 = decompose_unichar(c, &dsize1))) {
392 				c1 = c;
393 				dstr1 = &c1;
394 				dsize1 = 1;
395 			}
396 		}
397 
398 		if (!dsize2) {
399 			size = asc2unichar(sb, astr2, len2, &c);
400 			astr2 += size;
401 			len2 -= size;
402 
403 			if (!decompose || !(dstr2 = decompose_unichar(c, &dsize2))) {
404 				c2 = c;
405 				dstr2 = &c2;
406 				dsize2 = 1;
407 			}
408 		}
409 
410 		c1 = *dstr1;
411 		c2 = *dstr2;
412 		if (casefold) {
413 			if  (!(c1 = case_fold(c1))) {
414 				dstr1++;
415 				dsize1--;
416 				continue;
417 			}
418 			if (!(c2 = case_fold(c2))) {
419 				dstr2++;
420 				dsize2--;
421 				continue;
422 			}
423 		}
424 		if (c1 < c2)
425 			return -1;
426 		else if (c1 > c2)
427 			return 1;
428 
429 		dstr1++;
430 		dsize1--;
431 		dstr2++;
432 		dsize2--;
433 	}
434 
435 	if (len1 < len2)
436 		return -1;
437 	if (len1 > len2)
438 		return 1;
439 	return 0;
440 }
441