• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*	$NetBSD: chartype.c,v 1.37 2023/08/10 20:38:00 mrg Exp $	*/
2 
3 /*-
4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * chartype.c: character classification and meta information
31  */
32 #include "config.h"
33 #if !defined(lint) && !defined(SCCSID)
34 __RCSID("$NetBSD: chartype.c,v 1.37 2023/08/10 20:38:00 mrg Exp $");
35 #endif /* not lint && not SCCSID */
36 
37 #include <ctype.h>
38 #include <limits.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <stdint.h>
42 
43 #include "el.h"
44 
45 #define CT_BUFSIZ ((size_t)1024)
46 
47 static int ct_conv_cbuff_resize(ct_buffer_t *, size_t);
48 static int ct_conv_wbuff_resize(ct_buffer_t *, size_t);
49 
50 static int
ct_conv_cbuff_resize(ct_buffer_t * conv,size_t csize)51 ct_conv_cbuff_resize(ct_buffer_t *conv, size_t csize)
52 {
53 	void *p;
54 
55 	if (csize <= conv->csize)
56 		return 0;
57 
58 	conv->csize = csize;
59 
60 	p = el_realloc(conv->cbuff, conv->csize * sizeof(*conv->cbuff));
61 	if (p == NULL) {
62 		conv->csize = 0;
63 		el_free(conv->cbuff);
64 		conv->cbuff = NULL;
65 		return -1;
66 	}
67 	conv->cbuff = p;
68 	return 0;
69 }
70 
71 static int
ct_conv_wbuff_resize(ct_buffer_t * conv,size_t wsize)72 ct_conv_wbuff_resize(ct_buffer_t *conv, size_t wsize)
73 {
74 	void *p;
75 
76 	if (wsize <= conv->wsize)
77 		return 0;
78 
79 	conv->wsize = wsize;
80 
81 	p = el_realloc(conv->wbuff, conv->wsize * sizeof(*conv->wbuff));
82 	if (p == NULL) {
83 		conv->wsize = 0;
84 		el_free(conv->wbuff);
85 		conv->wbuff = NULL;
86 		return -1;
87 	}
88 	conv->wbuff = p;
89 	return 0;
90 }
91 
92 
93 char *
ct_encode_string(const wchar_t * s,ct_buffer_t * conv)94 ct_encode_string(const wchar_t *s, ct_buffer_t *conv)
95 {
96 	char *dst;
97 	ssize_t used;
98 
99 	if (!s)
100 		return NULL;
101 
102 	dst = conv->cbuff;
103 	for (;;) {
104 		used = (ssize_t)(dst - conv->cbuff);
105 		if ((conv->csize - (size_t)used) < 5) {
106 			if (ct_conv_cbuff_resize(conv,
107 			    conv->csize + CT_BUFSIZ) == -1)
108 				return NULL;
109 			dst = conv->cbuff + used;
110 		}
111 		if (!*s)
112 			break;
113 		used = ct_encode_char(dst, (size_t)5, *s);
114 		if (used == -1) /* failed to encode, need more buffer space */
115 			abort();
116 		++s;
117 		dst += used;
118 	}
119 	*dst = '\0';
120 	return conv->cbuff;
121 }
122 
123 wchar_t *
ct_decode_string(const char * s,ct_buffer_t * conv)124 ct_decode_string(const char *s, ct_buffer_t *conv)
125 {
126 	size_t len;
127 
128 	if (!s)
129 		return NULL;
130 
131 	len = mbstowcs(NULL, s, (size_t)0);
132 	if (len == (size_t)-1)
133 		return NULL;
134 
135 	if (conv->wsize < ++len)
136 		if (ct_conv_wbuff_resize(conv, len + CT_BUFSIZ) == -1)
137 			return NULL;
138 
139 	mbstowcs(conv->wbuff, s, conv->wsize);
140 	return conv->wbuff;
141 }
142 
143 
144 libedit_private wchar_t **
ct_decode_argv(int argc,const char * argv[],ct_buffer_t * conv)145 ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv)
146 {
147 	size_t bufspace;
148 	int i;
149 	wchar_t *p;
150 	wchar_t **wargv;
151 	ssize_t bytes;
152 
153 	/* Make sure we have enough space in the conversion buffer to store all
154 	 * the argv strings. */
155 	for (i = 0, bufspace = 0; i < argc; ++i)
156 		bufspace += argv[i] ? strlen(argv[i]) + 1 : 0;
157 	if (conv->wsize < ++bufspace)
158 		if (ct_conv_wbuff_resize(conv, bufspace + CT_BUFSIZ) == -1)
159 			return NULL;
160 
161 	wargv = el_calloc((size_t)(argc + 1), sizeof(*wargv));
162 	if (wargv == NULL)
163 		return NULL;
164 
165 	for (i = 0, p = conv->wbuff; i < argc; ++i) {
166 		if (!argv[i]) {   /* don't pass null pointers to mbstowcs */
167 			wargv[i] = NULL;
168 			continue;
169 		} else {
170 			wargv[i] = p;
171 			bytes = (ssize_t)mbstowcs(p, argv[i], bufspace);
172 		}
173 		if (bytes == -1) {
174 			el_free(wargv);
175 			return NULL;
176 		} else
177 			bytes++;  /* include '\0' in the count */
178 		bufspace -= (size_t)bytes;
179 		p += bytes;
180 	}
181 	wargv[i] = NULL;
182 
183 	return wargv;
184 }
185 
186 
187 libedit_private size_t
ct_enc_width(wchar_t c)188 ct_enc_width(wchar_t c)
189 {
190 	mbstate_t mbs;
191 	char buf[MB_LEN_MAX];
192 	size_t size;
193 	memset(&mbs, 0, sizeof(mbs));
194 
195 	if ((size = wcrtomb(buf, c, &mbs)) == (size_t)-1)
196 		return 0;
197 	return size;
198 }
199 
200 libedit_private ssize_t
ct_encode_char(char * dst,size_t len,wchar_t c)201 ct_encode_char(char *dst, size_t len, wchar_t c)
202 {
203 	ssize_t l = 0;
204 	if (len < ct_enc_width(c))
205 		return -1;
206 	l = wctomb(dst, c);
207 
208 	if (l < 0) {
209 		wctomb(NULL, L'\0');
210 		l = 0;
211 	}
212 	return l;
213 }
214 
215 libedit_private const wchar_t *
ct_visual_string(const wchar_t * s,ct_buffer_t * conv)216 ct_visual_string(const wchar_t *s, ct_buffer_t *conv)
217 {
218 	wchar_t *dst;
219 	ssize_t used;
220 
221 	if (!s)
222 		return NULL;
223 
224 	if (ct_conv_wbuff_resize(conv, CT_BUFSIZ) == -1)
225 		return NULL;
226 
227 	used = 0;
228 	dst = conv->wbuff;
229 	while (*s) {
230 		used = ct_visual_char(dst,
231 		    conv->wsize - (size_t)(dst - conv->wbuff), *s);
232 		if (used != -1) {
233 			++s;
234 			dst += used;
235 			continue;
236 		}
237 
238 		/* failed to encode, need more buffer space */
239 		uintptr_t sused = (uintptr_t)dst - (uintptr_t)conv->wbuff;
240 		if (ct_conv_wbuff_resize(conv, conv->wsize + CT_BUFSIZ) == -1)
241 			return NULL;
242 		dst = conv->wbuff + sused;
243 	}
244 
245 	if (dst >= (conv->wbuff + conv->wsize)) { /* sigh */
246 		uintptr_t sused = (uintptr_t)dst - (uintptr_t)conv->wbuff;
247 		if (ct_conv_wbuff_resize(conv, conv->wsize + CT_BUFSIZ) == -1)
248 			return NULL;
249 		dst = conv->wbuff + sused;
250 	}
251 
252 	*dst = L'\0';
253 	return conv->wbuff;
254 }
255 
256 
257 
258 libedit_private int
ct_visual_width(wchar_t c)259 ct_visual_width(wchar_t c)
260 {
261 	int t = ct_chr_class(c);
262 	switch (t) {
263 	case CHTYPE_ASCIICTL:
264 		return 2; /* ^@ ^? etc. */
265 	case CHTYPE_TAB:
266 		return 1; /* Hmm, this really need to be handled outside! */
267 	case CHTYPE_NL:
268 		return 0; /* Should this be 1 instead? */
269 	case CHTYPE_PRINT:
270 		return wcwidth(c);
271 	case CHTYPE_NONPRINT:
272 		if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
273 			return 8; /* \U+12345 */
274 		else
275 			return 7; /* \U+1234 */
276 	default:
277 		return 0; /* should not happen */
278 	}
279 }
280 
281 
282 libedit_private ssize_t
ct_visual_char(wchar_t * dst,size_t len,wchar_t c)283 ct_visual_char(wchar_t *dst, size_t len, wchar_t c)
284 {
285 	int t = ct_chr_class(c);
286 	switch (t) {
287 	case CHTYPE_TAB:
288 	case CHTYPE_NL:
289 	case CHTYPE_ASCIICTL:
290 		if (len < 2)
291 			return -1;   /* insufficient space */
292 		*dst++ = '^';
293 		if (c == '\177')
294 			*dst = '?'; /* DEL -> ^? */
295 		else
296 			*dst = c | 0100;    /* uncontrolify it */
297 		return 2;
298 	case CHTYPE_PRINT:
299 		if (len < 1)
300 			return -1;  /* insufficient space */
301 		*dst = c;
302 		return 1;
303 	case CHTYPE_NONPRINT:
304 		/* we only use single-width glyphs for display,
305 		 * so this is right */
306 		if ((ssize_t)len < ct_visual_width(c))
307 			return -1;   /* insufficient space */
308 		*dst++ = '\\';
309 		*dst++ = 'U';
310 		*dst++ = '+';
311 #define tohexdigit(v) "0123456789ABCDEF"[v]
312 		if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
313 			*dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf);
314 		*dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf);
315 		*dst++ = tohexdigit(((unsigned int) c >>  8) & 0xf);
316 		*dst++ = tohexdigit(((unsigned int) c >>  4) & 0xf);
317 		*dst   = tohexdigit(((unsigned int) c      ) & 0xf);
318 		return c > 0xffff ? 8 : 7;
319 		/*FALLTHROUGH*/
320 	/* these two should be handled outside this function */
321 	default:            /* we should never hit the default */
322 		return 0;
323 	}
324 }
325 
326 
327 
328 
329 libedit_private int
ct_chr_class(wchar_t c)330 ct_chr_class(wchar_t c)
331 {
332 	if (c == '\t')
333 		return CHTYPE_TAB;
334 	else if (c == '\n')
335 		return CHTYPE_NL;
336 	else if (c < 0x100 && iswcntrl(c))
337 		return CHTYPE_ASCIICTL;
338 	else if (iswprint(c))
339 		return CHTYPE_PRINT;
340 	else
341 		return CHTYPE_NONPRINT;
342 }
343