1 /*
2 * Copyright (c) 1993, 1994, 1995, 1996, 1997, 1998
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the Computer Systems
16 * Engineering Group at Lawrence Berkeley Laboratory.
17 * 4. Neither the name of the University nor of the Laboratory may be used
18 * to endorse or promote products derived from this software without
19 * specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 /*
35 * Utilities for message formatting used both by libpcap and rpcapd.
36 */
37
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include "ftmacros.h"
43
44 #include <stddef.h>
45 #include <stdarg.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <errno.h>
49
50 #include "pcap-int.h"
51
52 #include "portability.h"
53
54 #include "fmtutils.h"
55
56 #ifdef _WIN32
57 #include "charconv.h"
58 #endif
59
60 /*
61 * Set the encoding.
62 */
63 #ifdef _WIN32
64 /*
65 * True if we shouold use UTF-8.
66 */
67 static int use_utf_8;
68
69 void
pcap_fmt_set_encoding(unsigned int opts)70 pcap_fmt_set_encoding(unsigned int opts)
71 {
72 if (opts == PCAP_CHAR_ENC_UTF_8)
73 use_utf_8 = 1;
74 }
75 #else
76 void
pcap_fmt_set_encoding(unsigned int opts _U_)77 pcap_fmt_set_encoding(unsigned int opts _U_)
78 {
79 /*
80 * Nothing to do here.
81 */
82 }
83 #endif
84
85 #ifdef _WIN32
86 /*
87 * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
88 * a buffer starting at the specified location and stopping if we go
89 * past the specified size. This will only put out complete UTF-8
90 * sequences.
91 *
92 * We do this ourselves because Microsoft doesn't offer a "convert and
93 * stop at a UTF-8 character boundary if we run out of space" routine.
94 */
95 #define IS_LEADING_SURROGATE(c) \
96 ((c) >= 0xd800 && (c) < 0xdc00)
97 #define IS_TRAILING_SURROGATE(c) \
98 ((c) >= 0xdc00 && (c) < 0xe000)
99 #define SURROGATE_VALUE(leading, trailing) \
100 (((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
101 #define REPLACEMENT_CHARACTER 0x0FFFD
102
103 static char *
utf_16le_to_utf_8_truncated(const wchar_t * utf_16,char * utf_8,size_t utf_8_len)104 utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8,
105 size_t utf_8_len)
106 {
107 wchar_t c, c2;
108 uint32_t uc;
109
110 if (utf_8_len == 0) {
111 /*
112 * Not even enough room for a trailing '\0'.
113 * Don't put anything into the buffer.
114 */
115 return (utf_8);
116 }
117
118 while ((c = *utf_16++) != '\0') {
119 if (IS_LEADING_SURROGATE(c)) {
120 /*
121 * Leading surrogate. Must be followed by
122 * a trailing surrogate.
123 */
124 c2 = *utf_16;
125 if (c2 == '\0') {
126 /*
127 * Oops, string ends with a lead
128 * surrogate. Try to drop in
129 * a REPLACEMENT CHARACTER, and
130 * don't move the string pointer,
131 * so on the next trip through
132 * the loop we grab the terminating
133 * '\0' and quit.
134 */
135 uc = REPLACEMENT_CHARACTER;
136 } else {
137 /*
138 * OK, we can consume this 2-octet
139 * value.
140 */
141 utf_16++;
142 if (IS_TRAILING_SURROGATE(c2)) {
143 /*
144 * Trailing surrogate.
145 * This calculation will,
146 * for c being a leading
147 * surrogate and c2 being
148 * a trailing surrogate,
149 * produce a value between
150 * 0x100000 and 0x10ffff,
151 * so it's always going to be
152 * a valid Unicode code point.
153 */
154 uc = SURROGATE_VALUE(c, c2);
155 } else {
156 /*
157 * Not a trailing surroage;
158 * try to drop in a
159 * REPLACEMENT CHARACTER.
160 */
161 uc = REPLACEMENT_CHARACTER;
162 }
163 }
164 } else {
165 /*
166 * Not a leading surrogate.
167 */
168 if (IS_TRAILING_SURROGATE(c)) {
169 /*
170 * Trailing surrogate without
171 * a preceding leading surrogate.
172 * Try to drop in a REPLACEMENT
173 * CHARACTER.
174 */
175 uc = REPLACEMENT_CHARACTER;
176 } else {
177 /*
178 * This is a valid BMP character;
179 * drop it in.
180 */
181 uc = c;
182 }
183 }
184
185 /*
186 * OK, uc is a valid Unicode character; how
187 * many bytes worth of UTF-8 does it require?
188 */
189 if (uc < 0x0080) {
190 /* 1 byte. */
191 if (utf_8_len < 2) {
192 /*
193 * Not enough room for that byte
194 * plus a trailing '\0'.
195 */
196 break;
197 }
198 *utf_8++ = (char)uc;
199 utf_8_len--;
200 } else if (uc < 0x0800) {
201 /* 2 bytes. */
202 if (utf_8_len < 3) {
203 /*
204 * Not enough room for those bytes
205 * plus a trailing '\0'.
206 */
207 break;
208 }
209 *utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
210 *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
211 utf_8_len -= 2;
212 } else if (uc < 0x010000) {
213 /* 3 bytes. */
214 if (utf_8_len < 4) {
215 /*
216 * Not enough room for those bytes
217 * plus a trailing '\0'.
218 */
219 break;
220 }
221 *utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
222 *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
223 *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
224 utf_8_len -= 3;
225 } else {
226 /* 4 bytes. */
227 if (utf_8_len < 5) {
228 /*
229 * Not enough room for those bytes
230 * plus a trailing '\0'.
231 */
232 break;
233 }
234 *utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
235 *utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
236 *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
237 *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
238 utf_8_len -= 3;
239 }
240 }
241
242 /*
243 * OK, we have enough room for (at least) a trailing '\0'.
244 * (We started out with enough room, thanks to the test
245 * for a zero-length buffer at the beginning, and if
246 * there wasn't enough room for any character we wanted
247 * to put into the buffer *plus* a trailing '\0',
248 * we'd have quit before putting it into the buffer,
249 * and thus would have left enough room for the trailing
250 * '\0'.)
251 *
252 * Drop it in.
253 */
254 *utf_8 = '\0';
255
256 /*
257 * Return a pointer to the terminating '\0', in case we
258 * want to drop something in after that.
259 */
260 return (utf_8);
261 }
262 #endif /* _WIN32 */
263
264 /*
265 * Generate an error message based on a format, arguments, and an
266 * errno, with a message for the errno after the formatted output.
267 */
268 void
pcap_fmt_errmsg_for_errno(char * errbuf,size_t errbuflen,int errnum,const char * fmt,...)269 pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
270 const char *fmt, ...)
271 {
272 va_list ap;
273 size_t msglen;
274 char *p;
275 size_t errbuflen_remaining;
276
277 va_start(ap, fmt);
278 vsnprintf(errbuf, errbuflen, fmt, ap);
279 va_end(ap);
280 msglen = strlen(errbuf);
281
282 /*
283 * Do we have enough space to append ": "?
284 * Including the terminating '\0', that's 3 bytes.
285 */
286 if (msglen + 3 > errbuflen) {
287 /* No - just give them what we've produced. */
288 return;
289 }
290 p = errbuf + msglen;
291 errbuflen_remaining = errbuflen - msglen;
292 *p++ = ':';
293 *p++ = ' ';
294 *p = '\0';
295 errbuflen_remaining -= 2;
296
297 /*
298 * Now append the string for the error code.
299 */
300 #if defined(HAVE__WCSERROR_S)
301 /*
302 * We have a Windows-style _wcserror_s().
303 * Generate a UTF-16LE error message.
304 */
305 wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
306 errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum);
307 if (err != 0) {
308 /*
309 * It doesn't appear to be documented anywhere obvious
310 * what the error returns from _wcserror_s().
311 */
312 snprintf(p, errbuflen_remaining, "Error %d", errnum);
313 return;
314 }
315
316 /*
317 * Now convert it from UTF-16LE to UTF-8, dropping it in the
318 * remaining space in the buffer, and truncating it - cleanly,
319 * on a UTF-8 character boundary - if it doesn't fit.
320 */
321 utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
322
323 /*
324 * Now, if we're not in UTF-8 mode, convert errbuf to the
325 * local code page.
326 */
327 if (!use_utf_8)
328 utf_8_to_acp_truncated(errbuf);
329 #elif defined(HAVE_GNU_STRERROR_R)
330 /*
331 * We have a GNU-style strerror_r(), which is *not* guaranteed to
332 * do anything to the buffer handed to it, and which returns a
333 * pointer to the error string, which may or may not be in
334 * the buffer.
335 *
336 * It is, however, guaranteed to succeed.
337 */
338 char strerror_buf[PCAP_ERRBUF_SIZE];
339 char *errstring = strerror_r(errnum, strerror_buf, PCAP_ERRBUF_SIZE);
340 snprintf(p, errbuflen_remaining, "%s", errstring);
341 #elif defined(HAVE_POSIX_STRERROR_R)
342 /*
343 * We have a POSIX-style strerror_r(), which is guaranteed to fill
344 * in the buffer, but is not guaranteed to succeed.
345 */
346 int err = strerror_r(errnum, p, errbuflen_remaining);
347 if (err == EINVAL) {
348 /*
349 * UNIX 03 says this isn't guaranteed to produce a
350 * fallback error message.
351 */
352 snprintf(p, errbuflen_remaining, "Unknown error: %d",
353 errnum);
354 } else if (err == ERANGE) {
355 /*
356 * UNIX 03 says this isn't guaranteed to produce a
357 * fallback error message.
358 */
359 snprintf(p, errbuflen_remaining,
360 "Message for error %d is too long", errnum);
361 }
362 #else
363 /*
364 * We have neither _wcserror_s() nor strerror_r(), so we're
365 * stuck with using pcap_strerror().
366 */
367 snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
368 #endif
369 }
370
371 #ifdef _WIN32
372 /*
373 * Generate an error message based on a format, arguments, and a
374 * Win32 error, with a message for the Win32 error after the formatted output.
375 */
376 void
pcap_fmt_errmsg_for_win32_err(char * errbuf,size_t errbuflen,DWORD errnum,const char * fmt,...)377 pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
378 const char *fmt, ...)
379 {
380 va_list ap;
381 size_t msglen;
382 char *p;
383 size_t errbuflen_remaining;
384 DWORD retval;
385 wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
386 size_t utf_8_len;
387
388 va_start(ap, fmt);
389 vsnprintf(errbuf, errbuflen, fmt, ap);
390 va_end(ap);
391 msglen = strlen(errbuf);
392
393 /*
394 * Do we have enough space to append ": "?
395 * Including the terminating '\0', that's 3 bytes.
396 */
397 if (msglen + 3 > errbuflen) {
398 /* No - just give them what we've produced. */
399 return;
400 }
401 p = errbuf + msglen;
402 errbuflen_remaining = errbuflen - msglen;
403 *p++ = ':';
404 *p++ = ' ';
405 *p = '\0';
406 msglen += 2;
407 errbuflen_remaining -= 2;
408
409 /*
410 * Now append the string for the error code.
411 *
412 * XXX - what language ID to use?
413 *
414 * For UN*Xes, pcap_strerror() may or may not return localized
415 * strings.
416 *
417 * We currently don't have localized messages for libpcap, but
418 * we might want to do so. On the other hand, if most of these
419 * messages are going to be read by libpcap developers and
420 * perhaps by developers of libpcap-based applications, English
421 * might be a better choice, so the developer doesn't have to
422 * get the message translated if it's in a language they don't
423 * happen to understand.
424 */
425 retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
426 NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
427 utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
428 if (retval == 0) {
429 /*
430 * Failed.
431 */
432 snprintf(p, errbuflen_remaining,
433 "Couldn't get error message for error (%lu)", errnum);
434 return;
435 }
436
437 /*
438 * Now convert it from UTF-16LE to UTF-8.
439 */
440 p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
441
442 /*
443 * Now append the error number, if it fits.
444 */
445 utf_8_len = p - errbuf;
446 errbuflen_remaining -= utf_8_len;
447 if (utf_8_len == 0) {
448 /* The message was empty. */
449 snprintf(p, errbuflen_remaining, "(%lu)", errnum);
450 } else
451 snprintf(p, errbuflen_remaining, " (%lu)", errnum);
452
453 /*
454 * Now, if we're not in UTF-8 mode, convert errbuf to the
455 * local code page.
456 */
457 if (!use_utf_8)
458 utf_8_to_acp_truncated(errbuf);
459 }
460 #endif
461