• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /***
2   This file is part of PulseAudio.
3 
4   Copyright 2006 Lennart Poettering
5   Copyright 2006 Pierre Ossman <ossman@cendio.se> for Cendio AB
6 
7   PulseAudio is free software; you can redistribute it and/or modify
8   it under the terms of the GNU Lesser General Public License as
9   published by the Free Software Foundation; either version 2.1 of the
10   License, or (at your option) any later version.
11 
12   PulseAudio is distributed in the hope that it will be useful, but
13   WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15   Lesser General Public License for more details.
16 
17   You should have received a copy of the GNU Lesser General Public
18   License along with PulseAudio; if not, see <http://www.gnu.org/licenses/>.
19 ***/
20 
21 /* This file is based on the GLIB utf8 validation functions. The
22  * original license text follows. */
23 
24 /* gutf8.c - Operations on UTF-8 strings.
25  *
26  * Copyright (C) 1999 Tom Tromey
27  * Copyright (C) 2000 Red Hat, Inc.
28  *
29  * This library is free software; you can redistribute it and/or
30  * modify it under the terms of the GNU Lesser General Public
31  * License as published by the Free Software Foundation; either
32  * version 2 of the License, or (at your option) any later version.
33  *
34  * This library is distributed in the hope that it will be useful,
35  * but WITHOUT ANY WARRANTY; without even the implied warranty of
36  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
37  * Lesser General Public License for more details.
38  *
39  * You should have received a copy of the GNU Lesser General Public
40  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
41  */
42 
43 #ifdef HAVE_CONFIG_H
44 #include <config.h>
45 #endif
46 
47 #include <errno.h>
48 #include <stdlib.h>
49 #include <inttypes.h>
50 #include <string.h>
51 
52 #ifdef HAVE_ICONV
53 #include <iconv.h>
54 #endif
55 
56 #include <pulse/xmalloc.h>
57 #include <pulsecore/macro.h>
58 
59 #include "utf8.h"
60 
61 #define FILTER_CHAR '_'
62 
is_unicode_valid(uint32_t ch)63 static inline bool is_unicode_valid(uint32_t ch) {
64 
65     if (ch >= 0x110000) /* End of unicode space */
66         return false;
67     if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
68         return false;
69     if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */
70         return false;
71     if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
72         return false;
73 
74     return true;
75 }
76 
is_continuation_char(uint8_t ch)77 static inline bool is_continuation_char(uint8_t ch) {
78     if ((ch & 0xc0) != 0x80) /* 10xxxxxx */
79         return false;
80     return true;
81 }
82 
merge_continuation_char(uint32_t * u_ch,uint8_t ch)83 static inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) {
84     *u_ch <<= 6;
85     *u_ch |= ch & 0x3f;
86 }
87 
utf8_validate(const char * str,char * output)88 static char* utf8_validate(const char *str, char *output) {
89     uint32_t val = 0;
90     uint32_t min = 0;
91     const uint8_t *p, *last;
92     int size;
93     uint8_t *o;
94 
95     pa_assert(str);
96 
97     o = (uint8_t*) output;
98     for (p = (const uint8_t*) str; *p; p++) {
99         if (*p < 128) {
100             if (o)
101                 *o = *p;
102         } else {
103             last = p;
104 
105             if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
106                 size = 2;
107                 min = 128;
108                 val = (uint32_t) (*p & 0x1e);
109                 goto ONE_REMAINING;
110             } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
111                 size = 3;
112                 min = (1 << 11);
113                 val = (uint32_t) (*p & 0x0f);
114                 goto TWO_REMAINING;
115             } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */
116                 size = 4;
117                 min = (1 << 16);
118                 val = (uint32_t) (*p & 0x07);
119             } else
120                 goto error;
121 
122             p++;
123             if (!is_continuation_char(*p))
124                 goto error;
125             merge_continuation_char(&val, *p);
126 
127 TWO_REMAINING:
128             p++;
129             if (!is_continuation_char(*p))
130                 goto error;
131             merge_continuation_char(&val, *p);
132 
133 ONE_REMAINING:
134             p++;
135             if (!is_continuation_char(*p))
136                 goto error;
137             merge_continuation_char(&val, *p);
138 
139             if (val < min)
140                 goto error;
141 
142             if (!is_unicode_valid(val))
143                 goto error;
144 
145             if (o) {
146                 memcpy(o, last, (size_t) size);
147                 o += size;
148             }
149 
150             continue;
151 
152 error:
153             if (o) {
154                 *o = FILTER_CHAR;
155                 p = last; /* We retry at the next character */
156             } else
157                 goto failure;
158         }
159 
160         if (o)
161             o++;
162     }
163 
164     if (o) {
165         *o = '\0';
166         return output;
167     }
168 
169     return (char*) str;
170 
171 failure:
172     return NULL;
173 }
174 
pa_utf8_valid(const char * str)175 char* pa_utf8_valid (const char *str) {
176     return utf8_validate(str, NULL);
177 }
178 
pa_utf8_filter(const char * str)179 char* pa_utf8_filter (const char *str) {
180     char *new_str;
181 
182     pa_assert(str);
183     new_str = pa_xmalloc(strlen(str) + 1);
184     return utf8_validate(str, new_str);
185 }
186 
187 #ifdef HAVE_ICONV
188 
iconv_simple(const char * str,const char * to,const char * from)189 static char* iconv_simple(const char *str, const char *to, const char *from) {
190     char *new_str;
191     size_t len, inlen;
192     iconv_t cd;
193     ICONV_CONST char *inbuf;
194     char *outbuf;
195     size_t res, inbytes, outbytes;
196 
197     pa_assert(str);
198     pa_assert(to);
199     pa_assert(from);
200 
201     cd = iconv_open(to, from);
202     if (cd == (iconv_t)-1)
203         return NULL;
204 
205     inlen = len = strlen(str) + 1;
206     new_str = pa_xmalloc(len);
207 
208     for (;;) {
209         inbuf = (ICONV_CONST char*) str; /* Brain dead prototype for iconv() */
210         inbytes = inlen;
211         outbuf = new_str;
212         outbytes = len;
213 
214         res = iconv(cd, &inbuf, &inbytes, &outbuf, &outbytes);
215 
216         if (res != (size_t)-1)
217             break;
218 
219         if (errno != E2BIG) {
220             pa_xfree(new_str);
221             new_str = NULL;
222             break;
223         }
224 
225         pa_assert(inbytes != 0);
226 
227         len += inbytes;
228         new_str = pa_xrealloc(new_str, len);
229     }
230 
231     iconv_close(cd);
232 
233     return new_str;
234 }
235 
pa_utf8_to_locale(const char * str)236 char* pa_utf8_to_locale (const char *str) {
237     return iconv_simple(str, "", "UTF-8");
238 }
239 
pa_locale_to_utf8(const char * str)240 char* pa_locale_to_utf8 (const char *str) {
241     return iconv_simple(str, "UTF-8", "");
242 }
243 
244 #else
245 
pa_utf8_to_locale(const char * str)246 char* pa_utf8_to_locale (const char *str) {
247     pa_assert(str);
248 
249     return pa_ascii_filter(str);
250 }
251 
pa_locale_to_utf8(const char * str)252 char* pa_locale_to_utf8 (const char *str) {
253     pa_assert(str);
254 
255     if (pa_utf8_valid(str))
256         return pa_xstrdup(str);
257 
258     return NULL;
259 }
260 
261 #endif
262 
pa_ascii_valid(const char * str)263 char *pa_ascii_valid(const char *str) {
264     const char *p;
265     pa_assert(str);
266 
267     for (p = str; *p; p++)
268         if ((unsigned char) *p >= 128)
269             return NULL;
270 
271     return (char*) str;
272 }
273 
pa_ascii_filter(const char * str)274 char *pa_ascii_filter(const char *str) {
275     char *r, *s, *d;
276     pa_assert(str);
277 
278     r = pa_xstrdup(str);
279 
280     for (s = r, d = r; *s; s++)
281         if ((unsigned char) *s < 128)
282             *(d++) = *s;
283 
284     *d = 0;
285 
286     return r;
287 }
288