1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18 /*
19 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
20 * file for a list of people on the GLib Team. See the ChangeLog
21 * files for a list of changes. These files are distributed with
22 * GLib at ftp://ftp.gtk.org/pub/gtk/.
23 */
24
25 /*
26 * MT safe
27 */
28
29 #include "config.h"
30
31 #include <stdarg.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <locale.h>
35 #include <string.h>
36 #include <locale.h>
37 #include <errno.h>
38 #include <ctype.h> /* For tolower() */
39
40 #ifdef HAVE_XLOCALE_H
41 /* Needed on BSD/OS X for e.g. strtod_l */
42 #include <xlocale.h>
43 #endif
44
45 #ifdef G_OS_WIN32
46 #include <windows.h>
47 #endif
48
49 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
50
51 #include "gstrfuncs.h"
52
53 #include "gprintf.h"
54 #include "gprintfint.h"
55 #include "glibintl.h"
56
57
58 /**
59 * SECTION:string_utils
60 * @title: String Utility Functions
61 * @short_description: various string-related functions
62 *
63 * This section describes a number of utility functions for creating,
64 * duplicating, and manipulating strings.
65 *
66 * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
67 * g_vprintf(), g_vfprintf(), g_vsprintf() and g_vasprintf()
68 * are declared in the header `gprintf.h` which is not included in `glib.h`
69 * (otherwise using `glib.h` would drag in `stdio.h`), so you'll have to
70 * explicitly include `<glib/gprintf.h>` in order to use the GLib
71 * printf() functions.
72 *
73 * ## String precision pitfalls # {#string-precision}
74 *
75 * While you may use the printf() functions to format UTF-8 strings,
76 * notice that the precision of a \%Ns parameter is interpreted
77 * as the number of bytes, not characters to print. On top of that,
78 * the GNU libc implementation of the printf() functions has the
79 * "feature" that it checks that the string given for the \%Ns
80 * parameter consists of a whole number of characters in the current
81 * encoding. So, unless you are sure you are always going to be in an
82 * UTF-8 locale or your know your text is restricted to ASCII, avoid
83 * using \%Ns. If your intention is to format strings for a
84 * certain number of columns, then \%Ns is not a correct solution
85 * anyway, since it fails to take wide characters (see g_unichar_iswide())
86 * into account.
87 *
88 * Note also that there are various printf() parameters which are platform
89 * dependent. GLib provides platform independent macros for these parameters
90 * which should be used instead. A common example is %G_GUINT64_FORMAT, which
91 * should be used instead of `%llu` or similar parameters for formatting
92 * 64-bit integers. These macros are all named `G_*_FORMAT`; see
93 * [Basic Types][glib-Basic-Types].
94 */
95
96 /**
97 * g_ascii_isalnum:
98 * @c: any character
99 *
100 * Determines whether a character is alphanumeric.
101 *
102 * Unlike the standard C library isalnum() function, this only
103 * recognizes standard ASCII letters and ignores the locale,
104 * returning %FALSE for all non-ASCII characters. Also, unlike
105 * the standard library function, this takes a char, not an int,
106 * so don't call it on %EOF, but no need to cast to #guchar before
107 * passing a possibly non-ASCII character in.
108 *
109 * Returns: %TRUE if @c is an ASCII alphanumeric character
110 */
111
112 /**
113 * g_ascii_isalpha:
114 * @c: any character
115 *
116 * Determines whether a character is alphabetic (i.e. a letter).
117 *
118 * Unlike the standard C library isalpha() function, this only
119 * recognizes standard ASCII letters and ignores the locale,
120 * returning %FALSE for all non-ASCII characters. Also, unlike
121 * the standard library function, this takes a char, not an int,
122 * so don't call it on %EOF, but no need to cast to #guchar before
123 * passing a possibly non-ASCII character in.
124 *
125 * Returns: %TRUE if @c is an ASCII alphabetic character
126 */
127
128 /**
129 * g_ascii_iscntrl:
130 * @c: any character
131 *
132 * Determines whether a character is a control character.
133 *
134 * Unlike the standard C library iscntrl() function, this only
135 * recognizes standard ASCII control characters and ignores the
136 * locale, returning %FALSE for all non-ASCII characters. Also,
137 * unlike the standard library function, this takes a char, not
138 * an int, so don't call it on %EOF, but no need to cast to #guchar
139 * before passing a possibly non-ASCII character in.
140 *
141 * Returns: %TRUE if @c is an ASCII control character.
142 */
143
144 /**
145 * g_ascii_isdigit:
146 * @c: any character
147 *
148 * Determines whether a character is digit (0-9).
149 *
150 * Unlike the standard C library isdigit() function, this takes
151 * a char, not an int, so don't call it on %EOF, but no need to
152 * cast to #guchar before passing a possibly non-ASCII character in.
153 *
154 * Returns: %TRUE if @c is an ASCII digit.
155 */
156
157 /**
158 * g_ascii_isgraph:
159 * @c: any character
160 *
161 * Determines whether a character is a printing character and not a space.
162 *
163 * Unlike the standard C library isgraph() function, this only
164 * recognizes standard ASCII characters and ignores the locale,
165 * returning %FALSE for all non-ASCII characters. Also, unlike
166 * the standard library function, this takes a char, not an int,
167 * so don't call it on %EOF, but no need to cast to #guchar before
168 * passing a possibly non-ASCII character in.
169 *
170 * Returns: %TRUE if @c is an ASCII printing character other than space.
171 */
172
173 /**
174 * g_ascii_islower:
175 * @c: any character
176 *
177 * Determines whether a character is an ASCII lower case letter.
178 *
179 * Unlike the standard C library islower() function, this only
180 * recognizes standard ASCII letters and ignores the locale,
181 * returning %FALSE for all non-ASCII characters. Also, unlike
182 * the standard library function, this takes a char, not an int,
183 * so don't call it on %EOF, but no need to worry about casting
184 * to #guchar before passing a possibly non-ASCII character in.
185 *
186 * Returns: %TRUE if @c is an ASCII lower case letter
187 */
188
189 /**
190 * g_ascii_isprint:
191 * @c: any character
192 *
193 * Determines whether a character is a printing character.
194 *
195 * Unlike the standard C library isprint() function, this only
196 * recognizes standard ASCII characters and ignores the locale,
197 * returning %FALSE for all non-ASCII characters. Also, unlike
198 * the standard library function, this takes a char, not an int,
199 * so don't call it on %EOF, but no need to cast to #guchar before
200 * passing a possibly non-ASCII character in.
201 *
202 * Returns: %TRUE if @c is an ASCII printing character.
203 */
204
205 /**
206 * g_ascii_ispunct:
207 * @c: any character
208 *
209 * Determines whether a character is a punctuation character.
210 *
211 * Unlike the standard C library ispunct() function, this only
212 * recognizes standard ASCII letters and ignores the locale,
213 * returning %FALSE for all non-ASCII characters. Also, unlike
214 * the standard library function, this takes a char, not an int,
215 * so don't call it on %EOF, but no need to cast to #guchar before
216 * passing a possibly non-ASCII character in.
217 *
218 * Returns: %TRUE if @c is an ASCII punctuation character.
219 */
220
221 /**
222 * g_ascii_isspace:
223 * @c: any character
224 *
225 * Determines whether a character is a white-space character.
226 *
227 * Unlike the standard C library isspace() function, this only
228 * recognizes standard ASCII white-space and ignores the locale,
229 * returning %FALSE for all non-ASCII characters. Also, unlike
230 * the standard library function, this takes a char, not an int,
231 * so don't call it on %EOF, but no need to cast to #guchar before
232 * passing a possibly non-ASCII character in.
233 *
234 * Returns: %TRUE if @c is an ASCII white-space character
235 */
236
237 /**
238 * g_ascii_isupper:
239 * @c: any character
240 *
241 * Determines whether a character is an ASCII upper case letter.
242 *
243 * Unlike the standard C library isupper() function, this only
244 * recognizes standard ASCII letters and ignores the locale,
245 * returning %FALSE for all non-ASCII characters. Also, unlike
246 * the standard library function, this takes a char, not an int,
247 * so don't call it on %EOF, but no need to worry about casting
248 * to #guchar before passing a possibly non-ASCII character in.
249 *
250 * Returns: %TRUE if @c is an ASCII upper case letter
251 */
252
253 /**
254 * g_ascii_isxdigit:
255 * @c: any character
256 *
257 * Determines whether a character is a hexadecimal-digit character.
258 *
259 * Unlike the standard C library isxdigit() function, this takes
260 * a char, not an int, so don't call it on %EOF, but no need to
261 * cast to #guchar before passing a possibly non-ASCII character in.
262 *
263 * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
264 */
265
266 /**
267 * G_ASCII_DTOSTR_BUF_SIZE:
268 *
269 * A good size for a buffer to be passed into g_ascii_dtostr().
270 * It is guaranteed to be enough for all output of that function
271 * on systems with 64bit IEEE-compatible doubles.
272 *
273 * The typical usage would be something like:
274 * |[<!-- language="C" -->
275 * char buf[G_ASCII_DTOSTR_BUF_SIZE];
276 *
277 * fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value));
278 * ]|
279 */
280
281 /**
282 * g_strstrip:
283 * @string: a string to remove the leading and trailing whitespace from
284 *
285 * Removes leading and trailing whitespace from a string.
286 * See g_strchomp() and g_strchug().
287 *
288 * Returns: @string
289 */
290
291 /**
292 * G_STR_DELIMITERS:
293 *
294 * The standard delimiters, used in g_strdelimit().
295 */
296
297 static const guint16 ascii_table_data[256] = {
298 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
299 0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
300 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
301 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
302 0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
303 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
304 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
305 0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
306 0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
307 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
308 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
309 0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
310 0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
311 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
312 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
313 0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
314 /* the upper 128 are all zeroes */
315 };
316
317 const guint16 * const g_ascii_table = ascii_table_data;
318
319 #if defined (HAVE_NEWLOCALE) && \
320 defined (HAVE_USELOCALE) && \
321 defined (HAVE_STRTOD_L) && \
322 defined (HAVE_STRTOULL_L) && \
323 defined (HAVE_STRTOLL_L)
324 #define USE_XLOCALE 1
325 #endif
326
327 #ifdef USE_XLOCALE
328 static locale_t
get_C_locale(void)329 get_C_locale (void)
330 {
331 static gsize initialized = FALSE;
332 static locale_t C_locale = NULL;
333
334 if (g_once_init_enter (&initialized))
335 {
336 C_locale = newlocale (LC_ALL_MASK, "C", NULL);
337 g_once_init_leave (&initialized, TRUE);
338 }
339
340 return C_locale;
341 }
342 #endif
343
344 /**
345 * g_strdup:
346 * @str: (nullable): the string to duplicate
347 *
348 * Duplicates a string. If @str is %NULL it returns %NULL.
349 * The returned string should be freed with g_free()
350 * when no longer needed.
351 *
352 * Returns: a newly-allocated copy of @str
353 */
354 gchar*
g_strdup(const gchar * str)355 g_strdup (const gchar *str)
356 {
357 gchar *new_str;
358 gsize length;
359
360 if (str)
361 {
362 length = strlen (str) + 1;
363 new_str = g_new (char, length);
364 memcpy (new_str, str, length);
365 }
366 else
367 new_str = NULL;
368
369 return new_str;
370 }
371
372 /**
373 * g_memdup:
374 * @mem: the memory to copy.
375 * @byte_size: the number of bytes to copy.
376 *
377 * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
378 * from @mem. If @mem is %NULL it returns %NULL.
379 *
380 * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
381 * is %NULL.
382 */
383 gpointer
g_memdup(gconstpointer mem,guint byte_size)384 g_memdup (gconstpointer mem,
385 guint byte_size)
386 {
387 gpointer new_mem;
388
389 if (mem && byte_size != 0)
390 {
391 new_mem = g_malloc (byte_size);
392 memcpy (new_mem, mem, byte_size);
393 }
394 else
395 new_mem = NULL;
396
397 return new_mem;
398 }
399
400 /**
401 * g_memdup2:
402 * @mem: (nullable): the memory to copy.
403 * @byte_size: the number of bytes to copy.
404 *
405 * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
406 * from @mem. If @mem is %NULL it returns %NULL.
407 *
408 * This replaces g_memdup(), which was prone to integer overflows when
409 * converting the argument from a #gsize to a #guint.
410 *
411 * Returns: (nullable): a pointer to the newly-allocated copy of the memory,
412 * or %NULL if @mem is %NULL.
413 * Since: 2.68
414 */
415 gpointer
g_memdup2(gconstpointer mem,gsize byte_size)416 g_memdup2 (gconstpointer mem,
417 gsize byte_size)
418 {
419 gpointer new_mem;
420
421 if (mem && byte_size != 0)
422 {
423 new_mem = g_malloc (byte_size);
424 memcpy (new_mem, mem, byte_size);
425 }
426 else
427 new_mem = NULL;
428
429 return new_mem;
430 }
431
432 /**
433 * g_strndup:
434 * @str: the string to duplicate
435 * @n: the maximum number of bytes to copy from @str
436 *
437 * Duplicates the first @n bytes of a string, returning a newly-allocated
438 * buffer @n + 1 bytes long which will always be nul-terminated. If @str
439 * is less than @n bytes long the buffer is padded with nuls. If @str is
440 * %NULL it returns %NULL. The returned value should be freed when no longer
441 * needed.
442 *
443 * To copy a number of characters from a UTF-8 encoded string,
444 * use g_utf8_strncpy() instead.
445 *
446 * Returns: a newly-allocated buffer containing the first @n bytes
447 * of @str, nul-terminated
448 */
449 gchar*
g_strndup(const gchar * str,gsize n)450 g_strndup (const gchar *str,
451 gsize n)
452 {
453 gchar *new_str;
454
455 if (str)
456 {
457 new_str = g_new (gchar, n + 1);
458 strncpy (new_str, str, n);
459 new_str[n] = '\0';
460 }
461 else
462 new_str = NULL;
463
464 return new_str;
465 }
466
467 /**
468 * g_strnfill:
469 * @length: the length of the new string
470 * @fill_char: the byte to fill the string with
471 *
472 * Creates a new string @length bytes long filled with @fill_char.
473 * The returned string should be freed when no longer needed.
474 *
475 * Returns: a newly-allocated string filled the @fill_char
476 */
477 gchar*
g_strnfill(gsize length,gchar fill_char)478 g_strnfill (gsize length,
479 gchar fill_char)
480 {
481 gchar *str;
482
483 str = g_new (gchar, length + 1);
484 memset (str, (guchar)fill_char, length);
485 str[length] = '\0';
486
487 return str;
488 }
489
490 /**
491 * g_stpcpy:
492 * @dest: destination buffer.
493 * @src: source string.
494 *
495 * Copies a nul-terminated string into the dest buffer, include the
496 * trailing nul, and return a pointer to the trailing nul byte.
497 * This is useful for concatenating multiple strings together
498 * without having to repeatedly scan for the end.
499 *
500 * Returns: a pointer to trailing nul byte.
501 **/
502 gchar *
g_stpcpy(gchar * dest,const gchar * src)503 g_stpcpy (gchar *dest,
504 const gchar *src)
505 {
506 #ifdef HAVE_STPCPY
507 g_return_val_if_fail (dest != NULL, NULL);
508 g_return_val_if_fail (src != NULL, NULL);
509 return stpcpy (dest, src);
510 #else
511 gchar *d = dest;
512 const gchar *s = src;
513
514 g_return_val_if_fail (dest != NULL, NULL);
515 g_return_val_if_fail (src != NULL, NULL);
516 do
517 *d++ = *s;
518 while (*s++ != '\0');
519
520 return d - 1;
521 #endif
522 }
523
524 /**
525 * g_strdup_vprintf:
526 * @format: a standard printf() format string, but notice
527 * [string precision pitfalls][string-precision]
528 * @args: the list of parameters to insert into the format string
529 *
530 * Similar to the standard C vsprintf() function but safer, since it
531 * calculates the maximum space required and allocates memory to hold
532 * the result. The returned string should be freed with g_free() when
533 * no longer needed.
534 *
535 * See also g_vasprintf(), which offers the same functionality, but
536 * additionally returns the length of the allocated string.
537 *
538 * Returns: a newly-allocated string holding the result
539 */
540 gchar*
g_strdup_vprintf(const gchar * format,va_list args)541 g_strdup_vprintf (const gchar *format,
542 va_list args)
543 {
544 gchar *string = NULL;
545
546 g_vasprintf (&string, format, args);
547
548 return string;
549 }
550
551 /**
552 * g_strdup_printf:
553 * @format: a standard printf() format string, but notice
554 * [string precision pitfalls][string-precision]
555 * @...: the parameters to insert into the format string
556 *
557 * Similar to the standard C sprintf() function but safer, since it
558 * calculates the maximum space required and allocates memory to hold
559 * the result. The returned string should be freed with g_free() when no
560 * longer needed.
561 *
562 * Returns: a newly-allocated string holding the result
563 */
564 gchar*
g_strdup_printf(const gchar * format,...)565 g_strdup_printf (const gchar *format,
566 ...)
567 {
568 gchar *buffer;
569 va_list args;
570
571 va_start (args, format);
572 buffer = g_strdup_vprintf (format, args);
573 va_end (args);
574
575 return buffer;
576 }
577
578 /**
579 * g_strconcat:
580 * @string1: the first string to add, which must not be %NULL
581 * @...: a %NULL-terminated list of strings to append to the string
582 *
583 * Concatenates all of the given strings into one long string. The
584 * returned string should be freed with g_free() when no longer needed.
585 *
586 * The variable argument list must end with %NULL. If you forget the %NULL,
587 * g_strconcat() will start appending random memory junk to your string.
588 *
589 * Note that this function is usually not the right function to use to
590 * assemble a translated message from pieces, since proper translation
591 * often requires the pieces to be reordered.
592 *
593 * Returns: a newly-allocated string containing all the string arguments
594 */
595 gchar*
g_strconcat(const gchar * string1,...)596 g_strconcat (const gchar *string1, ...)
597 {
598 gsize l;
599 va_list args;
600 gchar *s;
601 gchar *concat;
602 gchar *ptr;
603
604 if (!string1)
605 return NULL;
606
607 l = 1 + strlen (string1);
608 va_start (args, string1);
609 s = va_arg (args, gchar*);
610 while (s)
611 {
612 l += strlen (s);
613 s = va_arg (args, gchar*);
614 }
615 va_end (args);
616
617 concat = g_new (gchar, l);
618 ptr = concat;
619
620 ptr = g_stpcpy (ptr, string1);
621 va_start (args, string1);
622 s = va_arg (args, gchar*);
623 while (s)
624 {
625 ptr = g_stpcpy (ptr, s);
626 s = va_arg (args, gchar*);
627 }
628 va_end (args);
629
630 return concat;
631 }
632
633 /**
634 * g_strtod:
635 * @nptr: the string to convert to a numeric value.
636 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
637 * character after the last character used in the conversion.
638 *
639 * Converts a string to a #gdouble value.
640 * It calls the standard strtod() function to handle the conversion, but
641 * if the string is not completely converted it attempts the conversion
642 * again with g_ascii_strtod(), and returns the best match.
643 *
644 * This function should seldom be used. The normal situation when reading
645 * numbers not for human consumption is to use g_ascii_strtod(). Only when
646 * you know that you must expect both locale formatted and C formatted numbers
647 * should you use this. Make sure that you don't pass strings such as comma
648 * separated lists of values, since the commas may be interpreted as a decimal
649 * point in some locales, causing unexpected results.
650 *
651 * Returns: the #gdouble value.
652 **/
653 gdouble
g_strtod(const gchar * nptr,gchar ** endptr)654 g_strtod (const gchar *nptr,
655 gchar **endptr)
656 {
657 gchar *fail_pos_1;
658 gchar *fail_pos_2;
659 gdouble val_1;
660 gdouble val_2 = 0;
661
662 g_return_val_if_fail (nptr != NULL, 0);
663
664 fail_pos_1 = NULL;
665 fail_pos_2 = NULL;
666
667 val_1 = strtod (nptr, &fail_pos_1);
668
669 if (fail_pos_1 && fail_pos_1[0] != 0)
670 val_2 = g_ascii_strtod (nptr, &fail_pos_2);
671
672 if (!fail_pos_1 || fail_pos_1[0] == 0 || fail_pos_1 >= fail_pos_2)
673 {
674 if (endptr)
675 *endptr = fail_pos_1;
676 return val_1;
677 }
678 else
679 {
680 if (endptr)
681 *endptr = fail_pos_2;
682 return val_2;
683 }
684 }
685
686 /**
687 * g_ascii_strtod:
688 * @nptr: the string to convert to a numeric value.
689 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
690 * character after the last character used in the conversion.
691 *
692 * Converts a string to a #gdouble value.
693 *
694 * This function behaves like the standard strtod() function
695 * does in the C locale. It does this without actually changing
696 * the current locale, since that would not be thread-safe.
697 * A limitation of the implementation is that this function
698 * will still accept localized versions of infinities and NANs.
699 *
700 * This function is typically used when reading configuration
701 * files or other non-user input that should be locale independent.
702 * To handle input from the user you should normally use the
703 * locale-sensitive system strtod() function.
704 *
705 * To convert from a #gdouble to a string in a locale-insensitive
706 * way, use g_ascii_dtostr().
707 *
708 * If the correct value would cause overflow, plus or minus %HUGE_VAL
709 * is returned (according to the sign of the value), and %ERANGE is
710 * stored in %errno. If the correct value would cause underflow,
711 * zero is returned and %ERANGE is stored in %errno.
712 *
713 * This function resets %errno before calling strtod() so that
714 * you can reliably detect overflow and underflow.
715 *
716 * Returns: the #gdouble value.
717 */
718 gdouble
g_ascii_strtod(const gchar * nptr,gchar ** endptr)719 g_ascii_strtod (const gchar *nptr,
720 gchar **endptr)
721 {
722 #ifdef USE_XLOCALE
723
724 g_return_val_if_fail (nptr != NULL, 0);
725
726 errno = 0;
727
728 return strtod_l (nptr, endptr, get_C_locale ());
729
730 #else
731
732 gchar *fail_pos;
733 gdouble val;
734 #ifndef __BIONIC__
735 struct lconv *locale_data;
736 #endif
737 const char *decimal_point;
738 gsize decimal_point_len;
739 const char *p, *decimal_point_pos;
740 const char *end = NULL; /* Silence gcc */
741 int strtod_errno;
742
743 g_return_val_if_fail (nptr != NULL, 0);
744
745 fail_pos = NULL;
746
747 #ifndef __BIONIC__
748 locale_data = localeconv ();
749 decimal_point = locale_data->decimal_point;
750 decimal_point_len = strlen (decimal_point);
751 #else
752 decimal_point = ".";
753 decimal_point_len = 1;
754 #endif
755
756 g_assert (decimal_point_len != 0);
757
758 decimal_point_pos = NULL;
759 end = NULL;
760
761 if (decimal_point[0] != '.' ||
762 decimal_point[1] != 0)
763 {
764 p = nptr;
765 /* Skip leading space */
766 while (g_ascii_isspace (*p))
767 p++;
768
769 /* Skip leading optional sign */
770 if (*p == '+' || *p == '-')
771 p++;
772
773 if (p[0] == '0' &&
774 (p[1] == 'x' || p[1] == 'X'))
775 {
776 p += 2;
777 /* HEX - find the (optional) decimal point */
778
779 while (g_ascii_isxdigit (*p))
780 p++;
781
782 if (*p == '.')
783 decimal_point_pos = p++;
784
785 while (g_ascii_isxdigit (*p))
786 p++;
787
788 if (*p == 'p' || *p == 'P')
789 p++;
790 if (*p == '+' || *p == '-')
791 p++;
792 while (g_ascii_isdigit (*p))
793 p++;
794
795 end = p;
796 }
797 else if (g_ascii_isdigit (*p) || *p == '.')
798 {
799 while (g_ascii_isdigit (*p))
800 p++;
801
802 if (*p == '.')
803 decimal_point_pos = p++;
804
805 while (g_ascii_isdigit (*p))
806 p++;
807
808 if (*p == 'e' || *p == 'E')
809 p++;
810 if (*p == '+' || *p == '-')
811 p++;
812 while (g_ascii_isdigit (*p))
813 p++;
814
815 end = p;
816 }
817 /* For the other cases, we need not convert the decimal point */
818 }
819
820 if (decimal_point_pos)
821 {
822 char *copy, *c;
823
824 /* We need to convert the '.' to the locale specific decimal point */
825 copy = g_malloc (end - nptr + 1 + decimal_point_len);
826
827 c = copy;
828 memcpy (c, nptr, decimal_point_pos - nptr);
829 c += decimal_point_pos - nptr;
830 memcpy (c, decimal_point, decimal_point_len);
831 c += decimal_point_len;
832 memcpy (c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
833 c += end - (decimal_point_pos + 1);
834 *c = 0;
835
836 errno = 0;
837 val = strtod (copy, &fail_pos);
838 strtod_errno = errno;
839
840 if (fail_pos)
841 {
842 if (fail_pos - copy > decimal_point_pos - nptr)
843 fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
844 else
845 fail_pos = (char *)nptr + (fail_pos - copy);
846 }
847
848 g_free (copy);
849
850 }
851 else if (end)
852 {
853 char *copy;
854
855 copy = g_malloc (end - (char *)nptr + 1);
856 memcpy (copy, nptr, end - nptr);
857 *(copy + (end - (char *)nptr)) = 0;
858
859 errno = 0;
860 val = strtod (copy, &fail_pos);
861 strtod_errno = errno;
862
863 if (fail_pos)
864 {
865 fail_pos = (char *)nptr + (fail_pos - copy);
866 }
867
868 g_free (copy);
869 }
870 else
871 {
872 errno = 0;
873 val = strtod (nptr, &fail_pos);
874 strtod_errno = errno;
875 }
876
877 if (endptr)
878 *endptr = fail_pos;
879
880 errno = strtod_errno;
881
882 return val;
883 #endif
884 }
885
886
887 /**
888 * g_ascii_dtostr:
889 * @buffer: A buffer to place the resulting string in
890 * @buf_len: The length of the buffer.
891 * @d: The #gdouble to convert
892 *
893 * Converts a #gdouble to a string, using the '.' as
894 * decimal point.
895 *
896 * This function generates enough precision that converting
897 * the string back using g_ascii_strtod() gives the same machine-number
898 * (on machines with IEEE compatible 64bit doubles). It is
899 * guaranteed that the size of the resulting string will never
900 * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes, including the terminating
901 * nul character, which is always added.
902 *
903 * Returns: The pointer to the buffer with the converted string.
904 **/
905 gchar *
g_ascii_dtostr(gchar * buffer,gint buf_len,gdouble d)906 g_ascii_dtostr (gchar *buffer,
907 gint buf_len,
908 gdouble d)
909 {
910 return g_ascii_formatd (buffer, buf_len, "%.17g", d);
911 }
912
913 #pragma GCC diagnostic push
914 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
915
916 /**
917 * g_ascii_formatd:
918 * @buffer: A buffer to place the resulting string in
919 * @buf_len: The length of the buffer.
920 * @format: The printf()-style format to use for the
921 * code to use for converting.
922 * @d: The #gdouble to convert
923 *
924 * Converts a #gdouble to a string, using the '.' as
925 * decimal point. To format the number you pass in
926 * a printf()-style format string. Allowed conversion
927 * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
928 *
929 * The returned buffer is guaranteed to be nul-terminated.
930 *
931 * If you just want to want to serialize the value into a
932 * string, use g_ascii_dtostr().
933 *
934 * Returns: The pointer to the buffer with the converted string.
935 */
936 gchar *
g_ascii_formatd(gchar * buffer,gint buf_len,const gchar * format,gdouble d)937 g_ascii_formatd (gchar *buffer,
938 gint buf_len,
939 const gchar *format,
940 gdouble d)
941 {
942 #ifdef USE_XLOCALE
943 locale_t old_locale;
944
945 old_locale = uselocale (get_C_locale ());
946 _g_snprintf (buffer, buf_len, format, d);
947 uselocale (old_locale);
948
949 return buffer;
950 #else
951 #ifndef __BIONIC__
952 struct lconv *locale_data;
953 #endif
954 const char *decimal_point;
955 gsize decimal_point_len;
956 gchar *p;
957 int rest_len;
958 gchar format_char;
959
960 g_return_val_if_fail (buffer != NULL, NULL);
961 g_return_val_if_fail (format[0] == '%', NULL);
962 g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL);
963
964 format_char = format[strlen (format) - 1];
965
966 g_return_val_if_fail (format_char == 'e' || format_char == 'E' ||
967 format_char == 'f' || format_char == 'F' ||
968 format_char == 'g' || format_char == 'G',
969 NULL);
970
971 if (format[0] != '%')
972 return NULL;
973
974 if (strpbrk (format + 1, "'l%"))
975 return NULL;
976
977 if (!(format_char == 'e' || format_char == 'E' ||
978 format_char == 'f' || format_char == 'F' ||
979 format_char == 'g' || format_char == 'G'))
980 return NULL;
981
982 _g_snprintf (buffer, buf_len, format, d);
983
984 #ifndef __BIONIC__
985 locale_data = localeconv ();
986 decimal_point = locale_data->decimal_point;
987 decimal_point_len = strlen (decimal_point);
988 #else
989 decimal_point = ".";
990 decimal_point_len = 1;
991 #endif
992
993 g_assert (decimal_point_len != 0);
994
995 if (decimal_point[0] != '.' ||
996 decimal_point[1] != 0)
997 {
998 p = buffer;
999
1000 while (g_ascii_isspace (*p))
1001 p++;
1002
1003 if (*p == '+' || *p == '-')
1004 p++;
1005
1006 while (isdigit ((guchar)*p))
1007 p++;
1008
1009 if (strncmp (p, decimal_point, decimal_point_len) == 0)
1010 {
1011 *p = '.';
1012 p++;
1013 if (decimal_point_len > 1)
1014 {
1015 rest_len = strlen (p + (decimal_point_len - 1));
1016 memmove (p, p + (decimal_point_len - 1), rest_len);
1017 p[rest_len] = 0;
1018 }
1019 }
1020 }
1021
1022 return buffer;
1023 #endif
1024 }
1025 #pragma GCC diagnostic pop
1026
1027 #define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
1028 (c) == '\r' || (c) == '\t' || (c) == '\v')
1029 #define ISUPPER(c) ((c) >= 'A' && (c) <= 'Z')
1030 #define ISLOWER(c) ((c) >= 'a' && (c) <= 'z')
1031 #define ISALPHA(c) (ISUPPER (c) || ISLOWER (c))
1032 #define TOUPPER(c) (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
1033 #define TOLOWER(c) (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
1034
1035 #ifndef USE_XLOCALE
1036
1037 static guint64
g_parse_long_long(const gchar * nptr,const gchar ** endptr,guint base,gboolean * negative)1038 g_parse_long_long (const gchar *nptr,
1039 const gchar **endptr,
1040 guint base,
1041 gboolean *negative)
1042 {
1043 /* this code is based on on the strtol(3) code from GNU libc released under
1044 * the GNU Lesser General Public License.
1045 *
1046 * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
1047 * Free Software Foundation, Inc.
1048 */
1049 gboolean overflow;
1050 guint64 cutoff;
1051 guint64 cutlim;
1052 guint64 ui64;
1053 const gchar *s, *save;
1054 guchar c;
1055
1056 g_return_val_if_fail (nptr != NULL, 0);
1057
1058 *negative = FALSE;
1059 if (base == 1 || base > 36)
1060 {
1061 errno = EINVAL;
1062 if (endptr)
1063 *endptr = nptr;
1064 return 0;
1065 }
1066
1067 save = s = nptr;
1068
1069 /* Skip white space. */
1070 while (ISSPACE (*s))
1071 ++s;
1072
1073 if (G_UNLIKELY (!*s))
1074 goto noconv;
1075
1076 /* Check for a sign. */
1077 if (*s == '-')
1078 {
1079 *negative = TRUE;
1080 ++s;
1081 }
1082 else if (*s == '+')
1083 ++s;
1084
1085 /* Recognize number prefix and if BASE is zero, figure it out ourselves. */
1086 if (*s == '0')
1087 {
1088 if ((base == 0 || base == 16) && TOUPPER (s[1]) == 'X')
1089 {
1090 s += 2;
1091 base = 16;
1092 }
1093 else if (base == 0)
1094 base = 8;
1095 }
1096 else if (base == 0)
1097 base = 10;
1098
1099 /* Save the pointer so we can check later if anything happened. */
1100 save = s;
1101 cutoff = G_MAXUINT64 / base;
1102 cutlim = G_MAXUINT64 % base;
1103
1104 overflow = FALSE;
1105 ui64 = 0;
1106 c = *s;
1107 for (; c; c = *++s)
1108 {
1109 if (c >= '0' && c <= '9')
1110 c -= '0';
1111 else if (ISALPHA (c))
1112 c = TOUPPER (c) - 'A' + 10;
1113 else
1114 break;
1115 if (c >= base)
1116 break;
1117 /* Check for overflow. */
1118 if (ui64 > cutoff || (ui64 == cutoff && c > cutlim))
1119 overflow = TRUE;
1120 else
1121 {
1122 ui64 *= base;
1123 ui64 += c;
1124 }
1125 }
1126
1127 /* Check if anything actually happened. */
1128 if (s == save)
1129 goto noconv;
1130
1131 /* Store in ENDPTR the address of one character
1132 past the last character we converted. */
1133 if (endptr)
1134 *endptr = s;
1135
1136 if (G_UNLIKELY (overflow))
1137 {
1138 errno = ERANGE;
1139 return G_MAXUINT64;
1140 }
1141
1142 return ui64;
1143
1144 noconv:
1145 /* We must handle a special case here: the base is 0 or 16 and the
1146 first two characters are '0' and 'x', but the rest are no
1147 hexadecimal digits. This is no error case. We return 0 and
1148 ENDPTR points to the `x`. */
1149 if (endptr)
1150 {
1151 if (save - nptr >= 2 && TOUPPER (save[-1]) == 'X'
1152 && save[-2] == '0')
1153 *endptr = &save[-1];
1154 else
1155 /* There was no number to convert. */
1156 *endptr = nptr;
1157 }
1158 return 0;
1159 }
1160 #endif /* !USE_XLOCALE */
1161
1162 /**
1163 * g_ascii_strtoull:
1164 * @nptr: the string to convert to a numeric value.
1165 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
1166 * character after the last character used in the conversion.
1167 * @base: to be used for the conversion, 2..36 or 0
1168 *
1169 * Converts a string to a #guint64 value.
1170 * This function behaves like the standard strtoull() function
1171 * does in the C locale. It does this without actually
1172 * changing the current locale, since that would not be
1173 * thread-safe.
1174 *
1175 * Note that input with a leading minus sign (`-`) is accepted, and will return
1176 * the negation of the parsed number, unless that would overflow a #guint64.
1177 * Critically, this means you cannot assume that a short fixed length input will
1178 * never result in a low return value, as the input could have a leading `-`.
1179 *
1180 * This function is typically used when reading configuration
1181 * files or other non-user input that should be locale independent.
1182 * To handle input from the user you should normally use the
1183 * locale-sensitive system strtoull() function.
1184 *
1185 * If the correct value would cause overflow, %G_MAXUINT64
1186 * is returned, and `ERANGE` is stored in `errno`.
1187 * If the base is outside the valid range, zero is returned, and
1188 * `EINVAL` is stored in `errno`.
1189 * If the string conversion fails, zero is returned, and @endptr returns
1190 * @nptr (if @endptr is non-%NULL).
1191 *
1192 * Returns: the #guint64 value or zero on error.
1193 *
1194 * Since: 2.2
1195 */
1196 guint64
g_ascii_strtoull(const gchar * nptr,gchar ** endptr,guint base)1197 g_ascii_strtoull (const gchar *nptr,
1198 gchar **endptr,
1199 guint base)
1200 {
1201 #ifdef USE_XLOCALE
1202 return strtoull_l (nptr, endptr, base, get_C_locale ());
1203 #else
1204 gboolean negative;
1205 guint64 result;
1206
1207 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1208
1209 /* Return the result of the appropriate sign. */
1210 return negative ? -result : result;
1211 #endif
1212 }
1213
1214 /**
1215 * g_ascii_strtoll:
1216 * @nptr: the string to convert to a numeric value.
1217 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
1218 * character after the last character used in the conversion.
1219 * @base: to be used for the conversion, 2..36 or 0
1220 *
1221 * Converts a string to a #gint64 value.
1222 * This function behaves like the standard strtoll() function
1223 * does in the C locale. It does this without actually
1224 * changing the current locale, since that would not be
1225 * thread-safe.
1226 *
1227 * This function is typically used when reading configuration
1228 * files or other non-user input that should be locale independent.
1229 * To handle input from the user you should normally use the
1230 * locale-sensitive system strtoll() function.
1231 *
1232 * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1233 * is returned, and `ERANGE` is stored in `errno`.
1234 * If the base is outside the valid range, zero is returned, and
1235 * `EINVAL` is stored in `errno`. If the
1236 * string conversion fails, zero is returned, and @endptr returns @nptr
1237 * (if @endptr is non-%NULL).
1238 *
1239 * Returns: the #gint64 value or zero on error.
1240 *
1241 * Since: 2.12
1242 */
1243 gint64
g_ascii_strtoll(const gchar * nptr,gchar ** endptr,guint base)1244 g_ascii_strtoll (const gchar *nptr,
1245 gchar **endptr,
1246 guint base)
1247 {
1248 #ifdef USE_XLOCALE
1249 return strtoll_l (nptr, endptr, base, get_C_locale ());
1250 #else
1251 gboolean negative;
1252 guint64 result;
1253
1254 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1255
1256 if (negative && result > (guint64) G_MININT64)
1257 {
1258 errno = ERANGE;
1259 return G_MININT64;
1260 }
1261 else if (!negative && result > (guint64) G_MAXINT64)
1262 {
1263 errno = ERANGE;
1264 return G_MAXINT64;
1265 }
1266 else if (negative)
1267 return - (gint64) result;
1268 else
1269 return (gint64) result;
1270 #endif
1271 }
1272
1273 /**
1274 * g_strerror:
1275 * @errnum: the system error number. See the standard C %errno
1276 * documentation
1277 *
1278 * Returns a string corresponding to the given error code, e.g. "no
1279 * such process". Unlike strerror(), this always returns a string in
1280 * UTF-8 encoding, and the pointer is guaranteed to remain valid for
1281 * the lifetime of the process.
1282 *
1283 * Note that the string may be translated according to the current locale.
1284 *
1285 * The value of %errno will not be changed by this function. However, it may
1286 * be changed by intermediate function calls, so you should save its value
1287 * as soon as the call returns:
1288 * |[
1289 * int saved_errno;
1290 *
1291 * ret = read (blah);
1292 * saved_errno = errno;
1293 *
1294 * g_strerror (saved_errno);
1295 * ]|
1296 *
1297 * Returns: a UTF-8 string describing the error code. If the error code
1298 * is unknown, it returns a string like "unknown error (<code>)".
1299 */
1300 const gchar *
g_strerror(gint errnum)1301 g_strerror (gint errnum)
1302 {
1303 static GHashTable *errors;
1304 G_LOCK_DEFINE_STATIC (errors);
1305 const gchar *msg;
1306 gint saved_errno = errno;
1307
1308 G_LOCK (errors);
1309 if (errors)
1310 msg = g_hash_table_lookup (errors, GINT_TO_POINTER (errnum));
1311 else
1312 {
1313 errors = g_hash_table_new (NULL, NULL);
1314 msg = NULL;
1315 }
1316
1317 if (!msg)
1318 {
1319 gchar buf[1024];
1320 GError *error = NULL;
1321
1322 #if defined(G_OS_WIN32)
1323 strerror_s (buf, sizeof (buf), errnum);
1324 msg = buf;
1325 #elif defined(HAVE_STRERROR_R)
1326 /* Match the condition in strerror_r(3) for glibc */
1327 # if defined(STRERROR_R_CHAR_P)
1328 msg = strerror_r (errnum, buf, sizeof (buf));
1329 # else
1330 (void) strerror_r (errnum, buf, sizeof (buf));
1331 msg = buf;
1332 # endif /* HAVE_STRERROR_R */
1333 #else
1334 g_strlcpy (buf, strerror (errnum), sizeof (buf));
1335 msg = buf;
1336 #endif
1337 if (!g_get_console_charset (NULL))
1338 {
1339 msg = g_locale_to_utf8 (msg, -1, NULL, NULL, &error);
1340 if (error)
1341 g_print ("%s\n", error->message);
1342 }
1343 else if (msg == (const gchar *)buf)
1344 msg = g_strdup (buf);
1345
1346 g_hash_table_insert (errors, GINT_TO_POINTER (errnum), (char *) msg);
1347 }
1348 G_UNLOCK (errors);
1349
1350 errno = saved_errno;
1351 return msg;
1352 }
1353
1354 /**
1355 * g_strsignal:
1356 * @signum: the signal number. See the `signal` documentation
1357 *
1358 * Returns a string describing the given signal, e.g. "Segmentation fault".
1359 * You should use this function in preference to strsignal(), because it
1360 * returns a string in UTF-8 encoding, and since not all platforms support
1361 * the strsignal() function.
1362 *
1363 * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1364 * it returns "unknown signal (<signum>)".
1365 */
1366 const gchar *
g_strsignal(gint signum)1367 g_strsignal (gint signum)
1368 {
1369 gchar *msg;
1370 gchar *tofree;
1371 const gchar *ret;
1372
1373 msg = tofree = NULL;
1374
1375 #ifdef HAVE_STRSIGNAL
1376 msg = strsignal (signum);
1377 if (!g_get_console_charset (NULL))
1378 msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1379 #endif
1380
1381 if (!msg)
1382 msg = tofree = g_strdup_printf ("unknown signal (%d)", signum);
1383 ret = g_intern_string (msg);
1384 g_free (tofree);
1385
1386 return ret;
1387 }
1388
1389 /* Functions g_strlcpy and g_strlcat were originally developed by
1390 * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1391 * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1392 * for more information.
1393 */
1394
1395 #ifdef HAVE_STRLCPY
1396 /* Use the native ones, if available; they might be implemented in assembly */
1397 gsize
g_strlcpy(gchar * dest,const gchar * src,gsize dest_size)1398 g_strlcpy (gchar *dest,
1399 const gchar *src,
1400 gsize dest_size)
1401 {
1402 g_return_val_if_fail (dest != NULL, 0);
1403 g_return_val_if_fail (src != NULL, 0);
1404
1405 return strlcpy (dest, src, dest_size);
1406 }
1407
1408 gsize
g_strlcat(gchar * dest,const gchar * src,gsize dest_size)1409 g_strlcat (gchar *dest,
1410 const gchar *src,
1411 gsize dest_size)
1412 {
1413 g_return_val_if_fail (dest != NULL, 0);
1414 g_return_val_if_fail (src != NULL, 0);
1415
1416 return strlcat (dest, src, dest_size);
1417 }
1418
1419 #else /* ! HAVE_STRLCPY */
1420 /**
1421 * g_strlcpy:
1422 * @dest: destination buffer
1423 * @src: source buffer
1424 * @dest_size: length of @dest in bytes
1425 *
1426 * Portability wrapper that calls strlcpy() on systems which have it,
1427 * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1428 * guaranteed to be nul-terminated; @src must be nul-terminated;
1429 * @dest_size is the buffer size, not the number of bytes to copy.
1430 *
1431 * At most @dest_size - 1 characters will be copied. Always nul-terminates
1432 * (unless @dest_size is 0). This function does not allocate memory. Unlike
1433 * strncpy(), this function doesn't pad @dest (so it's often faster). It
1434 * returns the size of the attempted result, strlen (src), so if
1435 * @retval >= @dest_size, truncation occurred.
1436 *
1437 * Caveat: strlcpy() is supposedly more secure than strcpy() or strncpy(),
1438 * but if you really want to avoid screwups, g_strdup() is an even better
1439 * idea.
1440 *
1441 * Returns: length of @src
1442 */
1443 gsize
g_strlcpy(gchar * dest,const gchar * src,gsize dest_size)1444 g_strlcpy (gchar *dest,
1445 const gchar *src,
1446 gsize dest_size)
1447 {
1448 gchar *d = dest;
1449 const gchar *s = src;
1450 gsize n = dest_size;
1451
1452 g_return_val_if_fail (dest != NULL, 0);
1453 g_return_val_if_fail (src != NULL, 0);
1454
1455 /* Copy as many bytes as will fit */
1456 if (n != 0 && --n != 0)
1457 do
1458 {
1459 gchar c = *s++;
1460
1461 *d++ = c;
1462 if (c == 0)
1463 break;
1464 }
1465 while (--n != 0);
1466
1467 /* If not enough room in dest, add NUL and traverse rest of src */
1468 if (n == 0)
1469 {
1470 if (dest_size != 0)
1471 *d = 0;
1472 while (*s++)
1473 ;
1474 }
1475
1476 return s - src - 1; /* count does not include NUL */
1477 }
1478
1479 /**
1480 * g_strlcat:
1481 * @dest: destination buffer, already containing one nul-terminated string
1482 * @src: source buffer
1483 * @dest_size: length of @dest buffer in bytes (not length of existing string
1484 * inside @dest)
1485 *
1486 * Portability wrapper that calls strlcat() on systems which have it,
1487 * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1488 * guaranteeing nul-termination for @dest. The total size of @dest won't
1489 * exceed @dest_size.
1490 *
1491 * At most @dest_size - 1 characters will be copied. Unlike strncat(),
1492 * @dest_size is the full size of dest, not the space left over. This
1493 * function does not allocate memory. It always nul-terminates (unless
1494 * @dest_size == 0 or there were no nul characters in the @dest_size
1495 * characters of dest to start with).
1496 *
1497 * Caveat: this is supposedly a more secure alternative to strcat() or
1498 * strncat(), but for real security g_strconcat() is harder to mess up.
1499 *
1500 * Returns: size of attempted result, which is MIN (dest_size, strlen
1501 * (original dest)) + strlen (src), so if retval >= dest_size,
1502 * truncation occurred.
1503 */
1504 gsize
g_strlcat(gchar * dest,const gchar * src,gsize dest_size)1505 g_strlcat (gchar *dest,
1506 const gchar *src,
1507 gsize dest_size)
1508 {
1509 gchar *d = dest;
1510 const gchar *s = src;
1511 gsize bytes_left = dest_size;
1512 gsize dlength; /* Logically, MIN (strlen (d), dest_size) */
1513
1514 g_return_val_if_fail (dest != NULL, 0);
1515 g_return_val_if_fail (src != NULL, 0);
1516
1517 /* Find the end of dst and adjust bytes left but don't go past end */
1518 while (*d != 0 && bytes_left-- != 0)
1519 d++;
1520 dlength = d - dest;
1521 bytes_left = dest_size - dlength;
1522
1523 if (bytes_left == 0)
1524 return dlength + strlen (s);
1525
1526 while (*s != 0)
1527 {
1528 if (bytes_left != 1)
1529 {
1530 *d++ = *s;
1531 bytes_left--;
1532 }
1533 s++;
1534 }
1535 *d = 0;
1536
1537 return dlength + (s - src); /* count does not include NUL */
1538 }
1539 #endif /* ! HAVE_STRLCPY */
1540
1541 /**
1542 * g_ascii_strdown:
1543 * @str: a string
1544 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1545 *
1546 * Converts all upper case ASCII letters to lower case ASCII letters.
1547 *
1548 * Returns: a newly-allocated string, with all the upper case
1549 * characters in @str converted to lower case, with semantics that
1550 * exactly match g_ascii_tolower(). (Note that this is unlike the
1551 * old g_strdown(), which modified the string in place.)
1552 */
1553 gchar*
g_ascii_strdown(const gchar * str,gssize len)1554 g_ascii_strdown (const gchar *str,
1555 gssize len)
1556 {
1557 gchar *result, *s;
1558
1559 g_return_val_if_fail (str != NULL, NULL);
1560
1561 if (len < 0)
1562 len = (gssize) strlen (str);
1563
1564 result = g_strndup (str, (gsize) len);
1565 for (s = result; *s; s++)
1566 *s = g_ascii_tolower (*s);
1567
1568 return result;
1569 }
1570
1571 /**
1572 * g_ascii_strup:
1573 * @str: a string
1574 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1575 *
1576 * Converts all lower case ASCII letters to upper case ASCII letters.
1577 *
1578 * Returns: a newly allocated string, with all the lower case
1579 * characters in @str converted to upper case, with semantics that
1580 * exactly match g_ascii_toupper(). (Note that this is unlike the
1581 * old g_strup(), which modified the string in place.)
1582 */
1583 gchar*
g_ascii_strup(const gchar * str,gssize len)1584 g_ascii_strup (const gchar *str,
1585 gssize len)
1586 {
1587 gchar *result, *s;
1588
1589 g_return_val_if_fail (str != NULL, NULL);
1590
1591 if (len < 0)
1592 len = (gssize) strlen (str);
1593
1594 result = g_strndup (str, (gsize) len);
1595 for (s = result; *s; s++)
1596 *s = g_ascii_toupper (*s);
1597
1598 return result;
1599 }
1600
1601 /**
1602 * g_str_is_ascii:
1603 * @str: a string
1604 *
1605 * Determines if a string is pure ASCII. A string is pure ASCII if it
1606 * contains no bytes with the high bit set.
1607 *
1608 * Returns: %TRUE if @str is ASCII
1609 *
1610 * Since: 2.40
1611 */
1612 gboolean
g_str_is_ascii(const gchar * str)1613 g_str_is_ascii (const gchar *str)
1614 {
1615 gint i;
1616
1617 for (i = 0; str[i]; i++)
1618 if (str[i] & 0x80)
1619 return FALSE;
1620
1621 return TRUE;
1622 }
1623
1624 /**
1625 * g_strdown:
1626 * @string: the string to convert.
1627 *
1628 * Converts a string to lower case.
1629 *
1630 * Returns: the string
1631 *
1632 * Deprecated:2.2: This function is totally broken for the reasons discussed
1633 * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1634 * instead.
1635 **/
1636 gchar*
g_strdown(gchar * string)1637 g_strdown (gchar *string)
1638 {
1639 guchar *s;
1640
1641 g_return_val_if_fail (string != NULL, NULL);
1642
1643 s = (guchar *) string;
1644
1645 while (*s)
1646 {
1647 if (isupper (*s))
1648 *s = tolower (*s);
1649 s++;
1650 }
1651
1652 return (gchar *) string;
1653 }
1654
1655 /**
1656 * g_strup:
1657 * @string: the string to convert
1658 *
1659 * Converts a string to upper case.
1660 *
1661 * Returns: the string
1662 *
1663 * Deprecated:2.2: This function is totally broken for the reasons
1664 * discussed in the g_strncasecmp() docs - use g_ascii_strup()
1665 * or g_utf8_strup() instead.
1666 */
1667 gchar*
g_strup(gchar * string)1668 g_strup (gchar *string)
1669 {
1670 guchar *s;
1671
1672 g_return_val_if_fail (string != NULL, NULL);
1673
1674 s = (guchar *) string;
1675
1676 while (*s)
1677 {
1678 if (islower (*s))
1679 *s = toupper (*s);
1680 s++;
1681 }
1682
1683 return (gchar *) string;
1684 }
1685
1686 /**
1687 * g_strreverse:
1688 * @string: the string to reverse
1689 *
1690 * Reverses all of the bytes in a string. For example,
1691 * `g_strreverse ("abcdef")` will result in "fedcba".
1692 *
1693 * Note that g_strreverse() doesn't work on UTF-8 strings
1694 * containing multibyte characters. For that purpose, use
1695 * g_utf8_strreverse().
1696 *
1697 * Returns: the same pointer passed in as @string
1698 */
1699 gchar*
g_strreverse(gchar * string)1700 g_strreverse (gchar *string)
1701 {
1702 g_return_val_if_fail (string != NULL, NULL);
1703
1704 if (*string)
1705 {
1706 gchar *h, *t;
1707
1708 h = string;
1709 t = string + strlen (string) - 1;
1710
1711 while (h < t)
1712 {
1713 gchar c;
1714
1715 c = *h;
1716 *h = *t;
1717 h++;
1718 *t = c;
1719 t--;
1720 }
1721 }
1722
1723 return string;
1724 }
1725
1726 /**
1727 * g_ascii_tolower:
1728 * @c: any character
1729 *
1730 * Convert a character to ASCII lower case.
1731 *
1732 * Unlike the standard C library tolower() function, this only
1733 * recognizes standard ASCII letters and ignores the locale, returning
1734 * all non-ASCII characters unchanged, even if they are lower case
1735 * letters in a particular character set. Also unlike the standard
1736 * library function, this takes and returns a char, not an int, so
1737 * don't call it on %EOF but no need to worry about casting to #guchar
1738 * before passing a possibly non-ASCII character in.
1739 *
1740 * Returns: the result of converting @c to lower case. If @c is
1741 * not an ASCII upper case letter, @c is returned unchanged.
1742 */
1743 gchar
g_ascii_tolower(gchar c)1744 g_ascii_tolower (gchar c)
1745 {
1746 return g_ascii_isupper (c) ? c - 'A' + 'a' : c;
1747 }
1748
1749 /**
1750 * g_ascii_toupper:
1751 * @c: any character
1752 *
1753 * Convert a character to ASCII upper case.
1754 *
1755 * Unlike the standard C library toupper() function, this only
1756 * recognizes standard ASCII letters and ignores the locale, returning
1757 * all non-ASCII characters unchanged, even if they are upper case
1758 * letters in a particular character set. Also unlike the standard
1759 * library function, this takes and returns a char, not an int, so
1760 * don't call it on %EOF but no need to worry about casting to #guchar
1761 * before passing a possibly non-ASCII character in.
1762 *
1763 * Returns: the result of converting @c to upper case. If @c is not
1764 * an ASCII lower case letter, @c is returned unchanged.
1765 */
1766 gchar
g_ascii_toupper(gchar c)1767 g_ascii_toupper (gchar c)
1768 {
1769 return g_ascii_islower (c) ? c - 'a' + 'A' : c;
1770 }
1771
1772 /**
1773 * g_ascii_digit_value:
1774 * @c: an ASCII character
1775 *
1776 * Determines the numeric value of a character as a decimal digit.
1777 * Differs from g_unichar_digit_value() because it takes a char, so
1778 * there's no worry about sign extension if characters are signed.
1779 *
1780 * Returns: If @c is a decimal digit (according to g_ascii_isdigit()),
1781 * its numeric value. Otherwise, -1.
1782 */
1783 int
g_ascii_digit_value(gchar c)1784 g_ascii_digit_value (gchar c)
1785 {
1786 if (g_ascii_isdigit (c))
1787 return c - '0';
1788 return -1;
1789 }
1790
1791 /**
1792 * g_ascii_xdigit_value:
1793 * @c: an ASCII character.
1794 *
1795 * Determines the numeric value of a character as a hexidecimal
1796 * digit. Differs from g_unichar_xdigit_value() because it takes
1797 * a char, so there's no worry about sign extension if characters
1798 * are signed.
1799 *
1800 * Returns: If @c is a hex digit (according to g_ascii_isxdigit()),
1801 * its numeric value. Otherwise, -1.
1802 */
1803 int
g_ascii_xdigit_value(gchar c)1804 g_ascii_xdigit_value (gchar c)
1805 {
1806 if (c >= 'A' && c <= 'F')
1807 return c - 'A' + 10;
1808 if (c >= 'a' && c <= 'f')
1809 return c - 'a' + 10;
1810 return g_ascii_digit_value (c);
1811 }
1812
1813 /**
1814 * g_ascii_strcasecmp:
1815 * @s1: string to compare with @s2
1816 * @s2: string to compare with @s1
1817 *
1818 * Compare two strings, ignoring the case of ASCII characters.
1819 *
1820 * Unlike the BSD strcasecmp() function, this only recognizes standard
1821 * ASCII letters and ignores the locale, treating all non-ASCII
1822 * bytes as if they are not letters.
1823 *
1824 * This function should be used only on strings that are known to be
1825 * in encodings where the bytes corresponding to ASCII letters always
1826 * represent themselves. This includes UTF-8 and the ISO-8859-*
1827 * charsets, but not for instance double-byte encodings like the
1828 * Windows Codepage 932, where the trailing bytes of double-byte
1829 * characters include all ASCII letters. If you compare two CP932
1830 * strings using this function, you will get false matches.
1831 *
1832 * Both @s1 and @s2 must be non-%NULL.
1833 *
1834 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1835 * or a positive value if @s1 > @s2.
1836 */
1837 gint
g_ascii_strcasecmp(const gchar * s1,const gchar * s2)1838 g_ascii_strcasecmp (const gchar *s1,
1839 const gchar *s2)
1840 {
1841 gint c1, c2;
1842
1843 g_return_val_if_fail (s1 != NULL, 0);
1844 g_return_val_if_fail (s2 != NULL, 0);
1845
1846 while (*s1 && *s2)
1847 {
1848 c1 = (gint)(guchar) TOLOWER (*s1);
1849 c2 = (gint)(guchar) TOLOWER (*s2);
1850 if (c1 != c2)
1851 return (c1 - c2);
1852 s1++; s2++;
1853 }
1854
1855 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1856 }
1857
1858 /**
1859 * g_ascii_strncasecmp:
1860 * @s1: string to compare with @s2
1861 * @s2: string to compare with @s1
1862 * @n: number of characters to compare
1863 *
1864 * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1865 * characters after the first @n in each string.
1866 *
1867 * Unlike the BSD strcasecmp() function, this only recognizes standard
1868 * ASCII letters and ignores the locale, treating all non-ASCII
1869 * characters as if they are not letters.
1870 *
1871 * The same warning as in g_ascii_strcasecmp() applies: Use this
1872 * function only on strings known to be in encodings where bytes
1873 * corresponding to ASCII letters always represent themselves.
1874 *
1875 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1876 * or a positive value if @s1 > @s2.
1877 */
1878 gint
g_ascii_strncasecmp(const gchar * s1,const gchar * s2,gsize n)1879 g_ascii_strncasecmp (const gchar *s1,
1880 const gchar *s2,
1881 gsize n)
1882 {
1883 gint c1, c2;
1884
1885 g_return_val_if_fail (s1 != NULL, 0);
1886 g_return_val_if_fail (s2 != NULL, 0);
1887
1888 while (n && *s1 && *s2)
1889 {
1890 n -= 1;
1891 c1 = (gint)(guchar) TOLOWER (*s1);
1892 c2 = (gint)(guchar) TOLOWER (*s2);
1893 if (c1 != c2)
1894 return (c1 - c2);
1895 s1++; s2++;
1896 }
1897
1898 if (n)
1899 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1900 else
1901 return 0;
1902 }
1903
1904 /**
1905 * g_strcasecmp:
1906 * @s1: a string
1907 * @s2: a string to compare with @s1
1908 *
1909 * A case-insensitive string comparison, corresponding to the standard
1910 * strcasecmp() function on platforms which support it.
1911 *
1912 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1913 * or a positive value if @s1 > @s2.
1914 *
1915 * Deprecated:2.2: See g_strncasecmp() for a discussion of why this
1916 * function is deprecated and how to replace it.
1917 */
1918 gint
g_strcasecmp(const gchar * s1,const gchar * s2)1919 g_strcasecmp (const gchar *s1,
1920 const gchar *s2)
1921 {
1922 #ifdef HAVE_STRCASECMP
1923 g_return_val_if_fail (s1 != NULL, 0);
1924 g_return_val_if_fail (s2 != NULL, 0);
1925
1926 return strcasecmp (s1, s2);
1927 #else
1928 gint c1, c2;
1929
1930 g_return_val_if_fail (s1 != NULL, 0);
1931 g_return_val_if_fail (s2 != NULL, 0);
1932
1933 while (*s1 && *s2)
1934 {
1935 /* According to A. Cox, some platforms have islower's that
1936 * don't work right on non-uppercase
1937 */
1938 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1939 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1940 if (c1 != c2)
1941 return (c1 - c2);
1942 s1++; s2++;
1943 }
1944
1945 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1946 #endif
1947 }
1948
1949 /**
1950 * g_strncasecmp:
1951 * @s1: a string
1952 * @s2: a string to compare with @s1
1953 * @n: the maximum number of characters to compare
1954 *
1955 * A case-insensitive string comparison, corresponding to the standard
1956 * strncasecmp() function on platforms which support it. It is similar
1957 * to g_strcasecmp() except it only compares the first @n characters of
1958 * the strings.
1959 *
1960 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1961 * or a positive value if @s1 > @s2.
1962 *
1963 * Deprecated:2.2: The problem with g_strncasecmp() is that it does
1964 * the comparison by calling toupper()/tolower(). These functions
1965 * are locale-specific and operate on single bytes. However, it is
1966 * impossible to handle things correctly from an internationalization
1967 * standpoint by operating on bytes, since characters may be multibyte.
1968 * Thus g_strncasecmp() is broken if your string is guaranteed to be
1969 * ASCII, since it is locale-sensitive, and it's broken if your string
1970 * is localized, since it doesn't work on many encodings at all,
1971 * including UTF-8, EUC-JP, etc.
1972 *
1973 * There are therefore two replacement techniques: g_ascii_strncasecmp(),
1974 * which only works on ASCII and is not locale-sensitive, and
1975 * g_utf8_casefold() followed by strcmp() on the resulting strings,
1976 * which is good for case-insensitive sorting of UTF-8.
1977 */
1978 gint
g_strncasecmp(const gchar * s1,const gchar * s2,guint n)1979 g_strncasecmp (const gchar *s1,
1980 const gchar *s2,
1981 guint n)
1982 {
1983 #ifdef HAVE_STRNCASECMP
1984 return strncasecmp (s1, s2, n);
1985 #else
1986 gint c1, c2;
1987
1988 g_return_val_if_fail (s1 != NULL, 0);
1989 g_return_val_if_fail (s2 != NULL, 0);
1990
1991 while (n && *s1 && *s2)
1992 {
1993 n -= 1;
1994 /* According to A. Cox, some platforms have islower's that
1995 * don't work right on non-uppercase
1996 */
1997 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1998 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1999 if (c1 != c2)
2000 return (c1 - c2);
2001 s1++; s2++;
2002 }
2003
2004 if (n)
2005 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
2006 else
2007 return 0;
2008 #endif
2009 }
2010
2011 /**
2012 * g_strdelimit:
2013 * @string: the string to convert
2014 * @delimiters: (nullable): a string containing the current delimiters,
2015 * or %NULL to use the standard delimiters defined in #G_STR_DELIMITERS
2016 * @new_delimiter: the new delimiter character
2017 *
2018 * Converts any delimiter characters in @string to @new_delimiter.
2019 * Any characters in @string which are found in @delimiters are
2020 * changed to the @new_delimiter character. Modifies @string in place,
2021 * and returns @string itself, not a copy. The return value is to
2022 * allow nesting such as
2023 * |[<!-- language="C" -->
2024 * g_ascii_strup (g_strdelimit (str, "abc", '?'))
2025 * ]|
2026 *
2027 * In order to modify a copy, you may use `g_strdup()`:
2028 * |[<!-- language="C" -->
2029 * reformatted = g_strdelimit (g_strdup (const_str), "abc", '?');
2030 * ...
2031 * g_free (reformatted);
2032 * ]|
2033 *
2034 * Returns: @string
2035 */
2036 gchar *
g_strdelimit(gchar * string,const gchar * delimiters,gchar new_delim)2037 g_strdelimit (gchar *string,
2038 const gchar *delimiters,
2039 gchar new_delim)
2040 {
2041 gchar *c;
2042
2043 g_return_val_if_fail (string != NULL, NULL);
2044
2045 if (!delimiters)
2046 delimiters = G_STR_DELIMITERS;
2047
2048 for (c = string; *c; c++)
2049 {
2050 if (strchr (delimiters, *c))
2051 *c = new_delim;
2052 }
2053
2054 return string;
2055 }
2056
2057 /**
2058 * g_strcanon:
2059 * @string: a nul-terminated array of bytes
2060 * @valid_chars: bytes permitted in @string
2061 * @substitutor: replacement character for disallowed bytes
2062 *
2063 * For each character in @string, if the character is not in @valid_chars,
2064 * replaces the character with @substitutor. Modifies @string in place,
2065 * and return @string itself, not a copy. The return value is to allow
2066 * nesting such as
2067 * |[<!-- language="C" -->
2068 * g_ascii_strup (g_strcanon (str, "abc", '?'))
2069 * ]|
2070 *
2071 * In order to modify a copy, you may use `g_strdup()`:
2072 * |[<!-- language="C" -->
2073 * reformatted = g_strcanon (g_strdup (const_str), "abc", '?');
2074 * ...
2075 * g_free (reformatted);
2076 * ]|
2077 *
2078 * Returns: @string
2079 */
2080 gchar *
g_strcanon(gchar * string,const gchar * valid_chars,gchar substitutor)2081 g_strcanon (gchar *string,
2082 const gchar *valid_chars,
2083 gchar substitutor)
2084 {
2085 gchar *c;
2086
2087 g_return_val_if_fail (string != NULL, NULL);
2088 g_return_val_if_fail (valid_chars != NULL, NULL);
2089
2090 for (c = string; *c; c++)
2091 {
2092 if (!strchr (valid_chars, *c))
2093 *c = substitutor;
2094 }
2095
2096 return string;
2097 }
2098
2099 /**
2100 * g_strcompress:
2101 * @source: a string to compress
2102 *
2103 * Replaces all escaped characters with their one byte equivalent.
2104 *
2105 * This function does the reverse conversion of g_strescape().
2106 *
2107 * Returns: a newly-allocated copy of @source with all escaped
2108 * character compressed
2109 */
2110 gchar *
g_strcompress(const gchar * source)2111 g_strcompress (const gchar *source)
2112 {
2113 const gchar *p = source, *octal;
2114 gchar *dest;
2115 gchar *q;
2116
2117 g_return_val_if_fail (source != NULL, NULL);
2118
2119 dest = g_malloc (strlen (source) + 1);
2120 q = dest;
2121
2122 while (*p)
2123 {
2124 if (*p == '\\')
2125 {
2126 p++;
2127 switch (*p)
2128 {
2129 case '\0':
2130 g_warning ("g_strcompress: trailing \\");
2131 goto out;
2132 case '0': case '1': case '2': case '3': case '4':
2133 case '5': case '6': case '7':
2134 *q = 0;
2135 octal = p;
2136 while ((p < octal + 3) && (*p >= '0') && (*p <= '7'))
2137 {
2138 *q = (*q * 8) + (*p - '0');
2139 p++;
2140 }
2141 q++;
2142 p--;
2143 break;
2144 case 'b':
2145 *q++ = '\b';
2146 break;
2147 case 'f':
2148 *q++ = '\f';
2149 break;
2150 case 'n':
2151 *q++ = '\n';
2152 break;
2153 case 'r':
2154 *q++ = '\r';
2155 break;
2156 case 't':
2157 *q++ = '\t';
2158 break;
2159 case 'v':
2160 *q++ = '\v';
2161 break;
2162 default: /* Also handles \" and \\ */
2163 *q++ = *p;
2164 break;
2165 }
2166 }
2167 else
2168 *q++ = *p;
2169 p++;
2170 }
2171 out:
2172 *q = 0;
2173
2174 return dest;
2175 }
2176
2177 /**
2178 * g_strescape:
2179 * @source: a string to escape
2180 * @exceptions: (nullable): a string of characters not to escape in @source
2181 *
2182 * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\'
2183 * and '"' in the string @source by inserting a '\' before
2184 * them. Additionally all characters in the range 0x01-0x1F (everything
2185 * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2186 * replaced with a '\' followed by their octal representation.
2187 * Characters supplied in @exceptions are not escaped.
2188 *
2189 * g_strcompress() does the reverse conversion.
2190 *
2191 * Returns: a newly-allocated copy of @source with certain
2192 * characters escaped. See above.
2193 */
2194 gchar *
g_strescape(const gchar * source,const gchar * exceptions)2195 g_strescape (const gchar *source,
2196 const gchar *exceptions)
2197 {
2198 const guchar *p;
2199 gchar *dest;
2200 gchar *q;
2201 guchar excmap[256];
2202
2203 g_return_val_if_fail (source != NULL, NULL);
2204
2205 p = (guchar *) source;
2206 /* Each source byte needs maximally four destination chars (\777) */
2207 q = dest = g_malloc (strlen (source) * 4 + 1);
2208
2209 memset (excmap, 0, 256);
2210 if (exceptions)
2211 {
2212 guchar *e = (guchar *) exceptions;
2213
2214 while (*e)
2215 {
2216 excmap[*e] = 1;
2217 e++;
2218 }
2219 }
2220
2221 while (*p)
2222 {
2223 if (excmap[*p])
2224 *q++ = *p;
2225 else
2226 {
2227 switch (*p)
2228 {
2229 case '\b':
2230 *q++ = '\\';
2231 *q++ = 'b';
2232 break;
2233 case '\f':
2234 *q++ = '\\';
2235 *q++ = 'f';
2236 break;
2237 case '\n':
2238 *q++ = '\\';
2239 *q++ = 'n';
2240 break;
2241 case '\r':
2242 *q++ = '\\';
2243 *q++ = 'r';
2244 break;
2245 case '\t':
2246 *q++ = '\\';
2247 *q++ = 't';
2248 break;
2249 case '\v':
2250 *q++ = '\\';
2251 *q++ = 'v';
2252 break;
2253 case '\\':
2254 *q++ = '\\';
2255 *q++ = '\\';
2256 break;
2257 case '"':
2258 *q++ = '\\';
2259 *q++ = '"';
2260 break;
2261 default:
2262 if ((*p < ' ') || (*p >= 0177))
2263 {
2264 *q++ = '\\';
2265 *q++ = '0' + (((*p) >> 6) & 07);
2266 *q++ = '0' + (((*p) >> 3) & 07);
2267 *q++ = '0' + ((*p) & 07);
2268 }
2269 else
2270 *q++ = *p;
2271 break;
2272 }
2273 }
2274 p++;
2275 }
2276 *q = 0;
2277 return dest;
2278 }
2279
2280 /**
2281 * g_strchug:
2282 * @string: a string to remove the leading whitespace from
2283 *
2284 * Removes leading whitespace from a string, by moving the rest
2285 * of the characters forward.
2286 *
2287 * This function doesn't allocate or reallocate any memory;
2288 * it modifies @string in place. Therefore, it cannot be used on
2289 * statically allocated strings.
2290 *
2291 * The pointer to @string is returned to allow the nesting of functions.
2292 *
2293 * Also see g_strchomp() and g_strstrip().
2294 *
2295 * Returns: @string
2296 */
2297 gchar *
g_strchug(gchar * string)2298 g_strchug (gchar *string)
2299 {
2300 guchar *start;
2301
2302 g_return_val_if_fail (string != NULL, NULL);
2303
2304 for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++)
2305 ;
2306
2307 memmove (string, start, strlen ((gchar *) start) + 1);
2308
2309 return string;
2310 }
2311
2312 /**
2313 * g_strchomp:
2314 * @string: a string to remove the trailing whitespace from
2315 *
2316 * Removes trailing whitespace from a string.
2317 *
2318 * This function doesn't allocate or reallocate any memory;
2319 * it modifies @string in place. Therefore, it cannot be used
2320 * on statically allocated strings.
2321 *
2322 * The pointer to @string is returned to allow the nesting of functions.
2323 *
2324 * Also see g_strchug() and g_strstrip().
2325 *
2326 * Returns: @string
2327 */
2328 gchar *
g_strchomp(gchar * string)2329 g_strchomp (gchar *string)
2330 {
2331 gsize len;
2332
2333 g_return_val_if_fail (string != NULL, NULL);
2334
2335 len = strlen (string);
2336 while (len--)
2337 {
2338 if (g_ascii_isspace ((guchar) string[len]))
2339 string[len] = '\0';
2340 else
2341 break;
2342 }
2343
2344 return string;
2345 }
2346
2347 /**
2348 * g_strsplit:
2349 * @string: a string to split
2350 * @delimiter: a string which specifies the places at which to split
2351 * the string. The delimiter is not included in any of the resulting
2352 * strings, unless @max_tokens is reached.
2353 * @max_tokens: the maximum number of pieces to split @string into.
2354 * If this is less than 1, the string is split completely.
2355 *
2356 * Splits a string into a maximum of @max_tokens pieces, using the given
2357 * @delimiter. If @max_tokens is reached, the remainder of @string is
2358 * appended to the last token.
2359 *
2360 * As an example, the result of g_strsplit (":a:bc::d:", ":", -1) is a
2361 * %NULL-terminated vector containing the six strings "", "a", "bc", "", "d"
2362 * and "".
2363 *
2364 * As a special case, the result of splitting the empty string "" is an empty
2365 * vector, not a vector containing a single string. The reason for this
2366 * special case is that being able to represent an empty vector is typically
2367 * more useful than consistent handling of empty elements. If you do need
2368 * to represent empty elements, you'll need to check for the empty string
2369 * before calling g_strsplit().
2370 *
2371 * Returns: a newly-allocated %NULL-terminated array of strings. Use
2372 * g_strfreev() to free it.
2373 */
2374 gchar**
g_strsplit(const gchar * string,const gchar * delimiter,gint max_tokens)2375 g_strsplit (const gchar *string,
2376 const gchar *delimiter,
2377 gint max_tokens)
2378 {
2379 GSList *string_list = NULL, *slist;
2380 gchar **str_array, *s;
2381 guint n = 0;
2382 const gchar *remainder;
2383
2384 g_return_val_if_fail (string != NULL, NULL);
2385 g_return_val_if_fail (delimiter != NULL, NULL);
2386 g_return_val_if_fail (delimiter[0] != '\0', NULL);
2387
2388 if (max_tokens < 1)
2389 max_tokens = G_MAXINT;
2390
2391 remainder = string;
2392 s = strstr (remainder, delimiter);
2393 if (s)
2394 {
2395 gsize delimiter_len = strlen (delimiter);
2396
2397 while (--max_tokens && s)
2398 {
2399 gsize len;
2400
2401 len = s - remainder;
2402 string_list = g_slist_prepend (string_list,
2403 g_strndup (remainder, len));
2404 n++;
2405 remainder = s + delimiter_len;
2406 s = strstr (remainder, delimiter);
2407 }
2408 }
2409 if (*string)
2410 {
2411 n++;
2412 string_list = g_slist_prepend (string_list, g_strdup (remainder));
2413 }
2414
2415 str_array = g_new (gchar*, n + 1);
2416
2417 str_array[n--] = NULL;
2418 for (slist = string_list; slist; slist = slist->next)
2419 str_array[n--] = slist->data;
2420
2421 g_slist_free (string_list);
2422
2423 return str_array;
2424 }
2425
2426 /**
2427 * g_strsplit_set:
2428 * @string: The string to be tokenized
2429 * @delimiters: A nul-terminated string containing bytes that are used
2430 * to split the string.
2431 * @max_tokens: The maximum number of tokens to split @string into.
2432 * If this is less than 1, the string is split completely
2433 *
2434 * Splits @string into a number of tokens not containing any of the characters
2435 * in @delimiter. A token is the (possibly empty) longest string that does not
2436 * contain any of the characters in @delimiters. If @max_tokens is reached, the
2437 * remainder is appended to the last token.
2438 *
2439 * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2440 * %NULL-terminated vector containing the three strings "abc", "def",
2441 * and "ghi".
2442 *
2443 * The result of g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2444 * vector containing the four strings "", "def", "ghi", and "".
2445 *
2446 * As a special case, the result of splitting the empty string "" is an empty
2447 * vector, not a vector containing a single string. The reason for this
2448 * special case is that being able to represent an empty vector is typically
2449 * more useful than consistent handling of empty elements. If you do need
2450 * to represent empty elements, you'll need to check for the empty string
2451 * before calling g_strsplit_set().
2452 *
2453 * Note that this function works on bytes not characters, so it can't be used
2454 * to delimit UTF-8 strings for anything but ASCII characters.
2455 *
2456 * Returns: a newly-allocated %NULL-terminated array of strings. Use
2457 * g_strfreev() to free it.
2458 *
2459 * Since: 2.4
2460 **/
2461 gchar **
g_strsplit_set(const gchar * string,const gchar * delimiters,gint max_tokens)2462 g_strsplit_set (const gchar *string,
2463 const gchar *delimiters,
2464 gint max_tokens)
2465 {
2466 gboolean delim_table[256];
2467 GSList *tokens, *list;
2468 gint n_tokens;
2469 const gchar *s;
2470 const gchar *current;
2471 gchar *token;
2472 gchar **result;
2473
2474 g_return_val_if_fail (string != NULL, NULL);
2475 g_return_val_if_fail (delimiters != NULL, NULL);
2476
2477 if (max_tokens < 1)
2478 max_tokens = G_MAXINT;
2479
2480 if (*string == '\0')
2481 {
2482 result = g_new (char *, 1);
2483 result[0] = NULL;
2484 return result;
2485 }
2486
2487 memset (delim_table, FALSE, sizeof (delim_table));
2488 for (s = delimiters; *s != '\0'; ++s)
2489 delim_table[*(guchar *)s] = TRUE;
2490
2491 tokens = NULL;
2492 n_tokens = 0;
2493
2494 s = current = string;
2495 while (*s != '\0')
2496 {
2497 if (delim_table[*(guchar *)s] && n_tokens + 1 < max_tokens)
2498 {
2499 token = g_strndup (current, s - current);
2500 tokens = g_slist_prepend (tokens, token);
2501 ++n_tokens;
2502
2503 current = s + 1;
2504 }
2505
2506 ++s;
2507 }
2508
2509 token = g_strndup (current, s - current);
2510 tokens = g_slist_prepend (tokens, token);
2511 ++n_tokens;
2512
2513 result = g_new (gchar *, n_tokens + 1);
2514
2515 result[n_tokens] = NULL;
2516 for (list = tokens; list != NULL; list = list->next)
2517 result[--n_tokens] = list->data;
2518
2519 g_slist_free (tokens);
2520
2521 return result;
2522 }
2523
2524 /**
2525 * GStrv:
2526 *
2527 * A typedef alias for gchar**. This is mostly useful when used together with
2528 * g_auto().
2529 */
2530
2531 /**
2532 * g_strfreev:
2533 * @str_array: (nullable): a %NULL-terminated array of strings to free
2534 *
2535 * Frees a %NULL-terminated array of strings, as well as each
2536 * string it contains.
2537 *
2538 * If @str_array is %NULL, this function simply returns.
2539 */
2540 void
g_strfreev(gchar ** str_array)2541 g_strfreev (gchar **str_array)
2542 {
2543 if (str_array)
2544 {
2545 int i;
2546
2547 for (i = 0; str_array[i] != NULL; i++)
2548 g_free (str_array[i]);
2549
2550 g_free (str_array);
2551 }
2552 }
2553
2554 /**
2555 * g_strdupv:
2556 * @str_array: (nullable): a %NULL-terminated array of strings
2557 *
2558 * Copies %NULL-terminated array of strings. The copy is a deep copy;
2559 * the new array should be freed by first freeing each string, then
2560 * the array itself. g_strfreev() does this for you. If called
2561 * on a %NULL value, g_strdupv() simply returns %NULL.
2562 *
2563 * Returns: (nullable): a new %NULL-terminated array of strings.
2564 */
2565 gchar**
g_strdupv(gchar ** str_array)2566 g_strdupv (gchar **str_array)
2567 {
2568 if (str_array)
2569 {
2570 gint i;
2571 gchar **retval;
2572
2573 i = 0;
2574 while (str_array[i])
2575 ++i;
2576
2577 retval = g_new (gchar*, i + 1);
2578
2579 i = 0;
2580 while (str_array[i])
2581 {
2582 retval[i] = g_strdup (str_array[i]);
2583 ++i;
2584 }
2585 retval[i] = NULL;
2586
2587 return retval;
2588 }
2589 else
2590 return NULL;
2591 }
2592
2593 /**
2594 * g_strjoinv:
2595 * @separator: (nullable): a string to insert between each of the
2596 * strings, or %NULL
2597 * @str_array: a %NULL-terminated array of strings to join
2598 *
2599 * Joins a number of strings together to form one long string, with the
2600 * optional @separator inserted between each of them. The returned string
2601 * should be freed with g_free().
2602 *
2603 * If @str_array has no items, the return value will be an
2604 * empty string. If @str_array contains a single item, @separator will not
2605 * appear in the resulting string.
2606 *
2607 * Returns: a newly-allocated string containing all of the strings joined
2608 * together, with @separator between them
2609 */
2610 gchar*
g_strjoinv(const gchar * separator,gchar ** str_array)2611 g_strjoinv (const gchar *separator,
2612 gchar **str_array)
2613 {
2614 gchar *string;
2615 gchar *ptr;
2616
2617 g_return_val_if_fail (str_array != NULL, NULL);
2618
2619 if (separator == NULL)
2620 separator = "";
2621
2622 if (*str_array)
2623 {
2624 gint i;
2625 gsize len;
2626 gsize separator_len;
2627
2628 separator_len = strlen (separator);
2629 /* First part, getting length */
2630 len = 1 + strlen (str_array[0]);
2631 for (i = 1; str_array[i] != NULL; i++)
2632 len += strlen (str_array[i]);
2633 len += separator_len * (i - 1);
2634
2635 /* Second part, building string */
2636 string = g_new (gchar, len);
2637 ptr = g_stpcpy (string, *str_array);
2638 for (i = 1; str_array[i] != NULL; i++)
2639 {
2640 ptr = g_stpcpy (ptr, separator);
2641 ptr = g_stpcpy (ptr, str_array[i]);
2642 }
2643 }
2644 else
2645 string = g_strdup ("");
2646
2647 return string;
2648 }
2649
2650 /**
2651 * g_strjoin:
2652 * @separator: (nullable): a string to insert between each of the
2653 * strings, or %NULL
2654 * @...: a %NULL-terminated list of strings to join
2655 *
2656 * Joins a number of strings together to form one long string, with the
2657 * optional @separator inserted between each of them. The returned string
2658 * should be freed with g_free().
2659 *
2660 * Returns: a newly-allocated string containing all of the strings joined
2661 * together, with @separator between them
2662 */
2663 gchar*
g_strjoin(const gchar * separator,...)2664 g_strjoin (const gchar *separator,
2665 ...)
2666 {
2667 gchar *string, *s;
2668 va_list args;
2669 gsize len;
2670 gsize separator_len;
2671 gchar *ptr;
2672
2673 if (separator == NULL)
2674 separator = "";
2675
2676 separator_len = strlen (separator);
2677
2678 va_start (args, separator);
2679
2680 s = va_arg (args, gchar*);
2681
2682 if (s)
2683 {
2684 /* First part, getting length */
2685 len = 1 + strlen (s);
2686
2687 s = va_arg (args, gchar*);
2688 while (s)
2689 {
2690 len += separator_len + strlen (s);
2691 s = va_arg (args, gchar*);
2692 }
2693 va_end (args);
2694
2695 /* Second part, building string */
2696 string = g_new (gchar, len);
2697
2698 va_start (args, separator);
2699
2700 s = va_arg (args, gchar*);
2701 ptr = g_stpcpy (string, s);
2702
2703 s = va_arg (args, gchar*);
2704 while (s)
2705 {
2706 ptr = g_stpcpy (ptr, separator);
2707 ptr = g_stpcpy (ptr, s);
2708 s = va_arg (args, gchar*);
2709 }
2710 }
2711 else
2712 string = g_strdup ("");
2713
2714 va_end (args);
2715
2716 return string;
2717 }
2718
2719
2720 /**
2721 * g_strstr_len:
2722 * @haystack: a string
2723 * @haystack_len: the maximum length of @haystack. Note that -1 is
2724 * a valid length, if @haystack is nul-terminated, meaning it will
2725 * search through the whole string.
2726 * @needle: the string to search for
2727 *
2728 * Searches the string @haystack for the first occurrence
2729 * of the string @needle, limiting the length of the search
2730 * to @haystack_len.
2731 *
2732 * Returns: a pointer to the found occurrence, or
2733 * %NULL if not found.
2734 */
2735 gchar *
g_strstr_len(const gchar * haystack,gssize haystack_len,const gchar * needle)2736 g_strstr_len (const gchar *haystack,
2737 gssize haystack_len,
2738 const gchar *needle)
2739 {
2740 g_return_val_if_fail (haystack != NULL, NULL);
2741 g_return_val_if_fail (needle != NULL, NULL);
2742
2743 if (haystack_len < 0)
2744 return strstr (haystack, needle);
2745 else
2746 {
2747 const gchar *p = haystack;
2748 gsize needle_len = strlen (needle);
2749 gsize haystack_len_unsigned = haystack_len;
2750 const gchar *end;
2751 gsize i;
2752
2753 if (needle_len == 0)
2754 return (gchar *)haystack;
2755
2756 if (haystack_len_unsigned < needle_len)
2757 return NULL;
2758
2759 end = haystack + haystack_len - needle_len;
2760
2761 while (p <= end && *p)
2762 {
2763 for (i = 0; i < needle_len; i++)
2764 if (p[i] != needle[i])
2765 goto next;
2766
2767 return (gchar *)p;
2768
2769 next:
2770 p++;
2771 }
2772
2773 return NULL;
2774 }
2775 }
2776
2777 /**
2778 * g_strrstr:
2779 * @haystack: a nul-terminated string
2780 * @needle: the nul-terminated string to search for
2781 *
2782 * Searches the string @haystack for the last occurrence
2783 * of the string @needle.
2784 *
2785 * Returns: a pointer to the found occurrence, or
2786 * %NULL if not found.
2787 */
2788 gchar *
g_strrstr(const gchar * haystack,const gchar * needle)2789 g_strrstr (const gchar *haystack,
2790 const gchar *needle)
2791 {
2792 gsize i;
2793 gsize needle_len;
2794 gsize haystack_len;
2795 const gchar *p;
2796
2797 g_return_val_if_fail (haystack != NULL, NULL);
2798 g_return_val_if_fail (needle != NULL, NULL);
2799
2800 needle_len = strlen (needle);
2801 haystack_len = strlen (haystack);
2802
2803 if (needle_len == 0)
2804 return (gchar *)haystack;
2805
2806 if (haystack_len < needle_len)
2807 return NULL;
2808
2809 p = haystack + haystack_len - needle_len;
2810
2811 while (p >= haystack)
2812 {
2813 for (i = 0; i < needle_len; i++)
2814 if (p[i] != needle[i])
2815 goto next;
2816
2817 return (gchar *)p;
2818
2819 next:
2820 p--;
2821 }
2822
2823 return NULL;
2824 }
2825
2826 /**
2827 * g_strrstr_len:
2828 * @haystack: a nul-terminated string
2829 * @haystack_len: the maximum length of @haystack
2830 * @needle: the nul-terminated string to search for
2831 *
2832 * Searches the string @haystack for the last occurrence
2833 * of the string @needle, limiting the length of the search
2834 * to @haystack_len.
2835 *
2836 * Returns: a pointer to the found occurrence, or
2837 * %NULL if not found.
2838 */
2839 gchar *
g_strrstr_len(const gchar * haystack,gssize haystack_len,const gchar * needle)2840 g_strrstr_len (const gchar *haystack,
2841 gssize haystack_len,
2842 const gchar *needle)
2843 {
2844 g_return_val_if_fail (haystack != NULL, NULL);
2845 g_return_val_if_fail (needle != NULL, NULL);
2846
2847 if (haystack_len < 0)
2848 return g_strrstr (haystack, needle);
2849 else
2850 {
2851 gsize needle_len = strlen (needle);
2852 const gchar *haystack_max = haystack + haystack_len;
2853 const gchar *p = haystack;
2854 gsize i;
2855
2856 while (p < haystack_max && *p)
2857 p++;
2858
2859 if (p < haystack + needle_len)
2860 return NULL;
2861
2862 p -= needle_len;
2863
2864 while (p >= haystack)
2865 {
2866 for (i = 0; i < needle_len; i++)
2867 if (p[i] != needle[i])
2868 goto next;
2869
2870 return (gchar *)p;
2871
2872 next:
2873 p--;
2874 }
2875
2876 return NULL;
2877 }
2878 }
2879
2880
2881 /**
2882 * g_str_has_suffix:
2883 * @str: a nul-terminated string
2884 * @suffix: the nul-terminated suffix to look for
2885 *
2886 * Looks whether the string @str ends with @suffix.
2887 *
2888 * Returns: %TRUE if @str end with @suffix, %FALSE otherwise.
2889 *
2890 * Since: 2.2
2891 */
2892 gboolean
g_str_has_suffix(const gchar * str,const gchar * suffix)2893 g_str_has_suffix (const gchar *str,
2894 const gchar *suffix)
2895 {
2896 gsize str_len;
2897 gsize suffix_len;
2898
2899 g_return_val_if_fail (str != NULL, FALSE);
2900 g_return_val_if_fail (suffix != NULL, FALSE);
2901
2902 str_len = strlen (str);
2903 suffix_len = strlen (suffix);
2904
2905 if (str_len < suffix_len)
2906 return FALSE;
2907
2908 return strcmp (str + str_len - suffix_len, suffix) == 0;
2909 }
2910
2911 /**
2912 * g_str_has_prefix:
2913 * @str: a nul-terminated string
2914 * @prefix: the nul-terminated prefix to look for
2915 *
2916 * Looks whether the string @str begins with @prefix.
2917 *
2918 * Returns: %TRUE if @str begins with @prefix, %FALSE otherwise.
2919 *
2920 * Since: 2.2
2921 */
2922 gboolean
g_str_has_prefix(const gchar * str,const gchar * prefix)2923 g_str_has_prefix (const gchar *str,
2924 const gchar *prefix)
2925 {
2926 g_return_val_if_fail (str != NULL, FALSE);
2927 g_return_val_if_fail (prefix != NULL, FALSE);
2928
2929 return strncmp (str, prefix, strlen (prefix)) == 0;
2930 }
2931
2932 /**
2933 * g_strv_length:
2934 * @str_array: a %NULL-terminated array of strings
2935 *
2936 * Returns the length of the given %NULL-terminated
2937 * string array @str_array. @str_array must not be %NULL.
2938 *
2939 * Returns: length of @str_array.
2940 *
2941 * Since: 2.6
2942 */
2943 guint
g_strv_length(gchar ** str_array)2944 g_strv_length (gchar **str_array)
2945 {
2946 guint i = 0;
2947
2948 g_return_val_if_fail (str_array != NULL, 0);
2949
2950 while (str_array[i])
2951 ++i;
2952
2953 return i;
2954 }
2955
2956 static void
index_add_folded(GPtrArray * array,const gchar * start,const gchar * end)2957 index_add_folded (GPtrArray *array,
2958 const gchar *start,
2959 const gchar *end)
2960 {
2961 gchar *normal;
2962
2963 normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE);
2964
2965 /* TODO: Invent time machine. Converse with Mustafa Ataturk... */
2966 if (strstr (normal, "ı") || strstr (normal, "İ"))
2967 {
2968 gchar *s = normal;
2969 GString *tmp;
2970
2971 tmp = g_string_new (NULL);
2972
2973 while (*s)
2974 {
2975 gchar *i, *I, *e;
2976
2977 i = strstr (s, "ı");
2978 I = strstr (s, "İ");
2979
2980 if (!i && !I)
2981 break;
2982 else if (i && !I)
2983 e = i;
2984 else if (I && !i)
2985 e = I;
2986 else if (i < I)
2987 e = i;
2988 else
2989 e = I;
2990
2991 g_string_append_len (tmp, s, e - s);
2992 g_string_append_c (tmp, 'i');
2993 s = g_utf8_next_char (e);
2994 }
2995
2996 g_string_append (tmp, s);
2997 g_free (normal);
2998 normal = g_string_free (tmp, FALSE);
2999 }
3000
3001 g_ptr_array_add (array, g_utf8_casefold (normal, -1));
3002 g_free (normal);
3003 }
3004
3005 static gchar **
split_words(const gchar * value)3006 split_words (const gchar *value)
3007 {
3008 const gchar *start = NULL;
3009 GPtrArray *result;
3010 const gchar *s;
3011
3012 result = g_ptr_array_new ();
3013
3014 for (s = value; *s; s = g_utf8_next_char (s))
3015 {
3016 gunichar c = g_utf8_get_char (s);
3017
3018 if (start == NULL)
3019 {
3020 if (g_unichar_isalnum (c) || g_unichar_ismark (c))
3021 start = s;
3022 }
3023 else
3024 {
3025 if (!g_unichar_isalnum (c) && !g_unichar_ismark (c))
3026 {
3027 index_add_folded (result, start, s);
3028 start = NULL;
3029 }
3030 }
3031 }
3032
3033 if (start)
3034 index_add_folded (result, start, s);
3035
3036 g_ptr_array_add (result, NULL);
3037
3038 return (gchar **) g_ptr_array_free (result, FALSE);
3039 }
3040
3041 /**
3042 * g_str_tokenize_and_fold:
3043 * @string: a string
3044 * @translit_locale: (nullable): the language code (like 'de' or
3045 * 'en_GB') from which @string originates
3046 * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a
3047 * return location for ASCII alternates
3048 *
3049 * Tokenises @string and performs folding on each token.
3050 *
3051 * A token is a non-empty sequence of alphanumeric characters in the
3052 * source string, separated by non-alphanumeric characters. An
3053 * "alphanumeric" character for this purpose is one that matches
3054 * g_unichar_isalnum() or g_unichar_ismark().
3055 *
3056 * Each token is then (Unicode) normalised and case-folded. If
3057 * @ascii_alternates is non-%NULL and some of the returned tokens
3058 * contain non-ASCII characters, ASCII alternatives will be generated.
3059 *
3060 * The number of ASCII alternatives that are generated and the method
3061 * for doing so is unspecified, but @translit_locale (if specified) may
3062 * improve the transliteration if the language of the source string is
3063 * known.
3064 *
3065 * Returns: (transfer full) (array zero-terminated=1): the folded tokens
3066 *
3067 * Since: 2.40
3068 **/
3069 gchar **
g_str_tokenize_and_fold(const gchar * string,const gchar * translit_locale,gchar *** ascii_alternates)3070 g_str_tokenize_and_fold (const gchar *string,
3071 const gchar *translit_locale,
3072 gchar ***ascii_alternates)
3073 {
3074 gchar **result;
3075
3076 g_return_val_if_fail (string != NULL, NULL);
3077
3078 if (ascii_alternates && g_str_is_ascii (string))
3079 {
3080 *ascii_alternates = g_new0 (gchar *, 0 + 1);
3081 ascii_alternates = NULL;
3082 }
3083
3084 result = split_words (string);
3085
3086 if (ascii_alternates)
3087 {
3088 gint i, j, n;
3089
3090 n = g_strv_length (result);
3091 *ascii_alternates = g_new (gchar *, n + 1);
3092 j = 0;
3093
3094 for (i = 0; i < n; i++)
3095 {
3096 if (!g_str_is_ascii (result[i]))
3097 {
3098 gchar *composed;
3099 gchar *ascii;
3100 gint k;
3101
3102 composed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL_COMPOSE);
3103
3104 ascii = g_str_to_ascii (composed, translit_locale);
3105
3106 /* Only accept strings that are now entirely alnums */
3107 for (k = 0; ascii[k]; k++)
3108 if (!g_ascii_isalnum (ascii[k]))
3109 break;
3110
3111 if (ascii[k] == '\0')
3112 /* Made it to the end... */
3113 (*ascii_alternates)[j++] = ascii;
3114 else
3115 g_free (ascii);
3116
3117 g_free (composed);
3118 }
3119 }
3120
3121 (*ascii_alternates)[j] = NULL;
3122 }
3123
3124 return result;
3125 }
3126
3127 /**
3128 * g_str_match_string:
3129 * @search_term: the search term from the user
3130 * @potential_hit: the text that may be a hit
3131 * @accept_alternates: %TRUE to accept ASCII alternates
3132 *
3133 * Checks if a search conducted for @search_term should match
3134 * @potential_hit.
3135 *
3136 * This function calls g_str_tokenize_and_fold() on both
3137 * @search_term and @potential_hit. ASCII alternates are never taken
3138 * for @search_term but will be taken for @potential_hit according to
3139 * the value of @accept_alternates.
3140 *
3141 * A hit occurs when each folded token in @search_term is a prefix of a
3142 * folded token from @potential_hit.
3143 *
3144 * Depending on how you're performing the search, it will typically be
3145 * faster to call g_str_tokenize_and_fold() on each string in
3146 * your corpus and build an index on the returned folded tokens, then
3147 * call g_str_tokenize_and_fold() on the search term and
3148 * perform lookups into that index.
3149 *
3150 * As some examples, searching for ‘fred’ would match the potential hit
3151 * ‘Smith, Fred’ and also ‘Frédéric’. Searching for ‘Fréd’ would match
3152 * ‘Frédéric’ but not ‘Frederic’ (due to the one-directional nature of
3153 * accent matching). Searching ‘fo’ would match ‘Foo’ and ‘Bar Foo
3154 * Baz’, but not ‘SFO’ (because no word has ‘fo’ as a prefix).
3155 *
3156 * Returns: %TRUE if @potential_hit is a hit
3157 *
3158 * Since: 2.40
3159 **/
3160 gboolean
g_str_match_string(const gchar * search_term,const gchar * potential_hit,gboolean accept_alternates)3161 g_str_match_string (const gchar *search_term,
3162 const gchar *potential_hit,
3163 gboolean accept_alternates)
3164 {
3165 gchar **alternates = NULL;
3166 gchar **term_tokens;
3167 gchar **hit_tokens;
3168 gboolean matched;
3169 gint i, j;
3170
3171 g_return_val_if_fail (search_term != NULL, FALSE);
3172 g_return_val_if_fail (potential_hit != NULL, FALSE);
3173
3174 term_tokens = g_str_tokenize_and_fold (search_term, NULL, NULL);
3175 hit_tokens = g_str_tokenize_and_fold (potential_hit, NULL, accept_alternates ? &alternates : NULL);
3176
3177 matched = TRUE;
3178
3179 for (i = 0; term_tokens[i]; i++)
3180 {
3181 for (j = 0; hit_tokens[j]; j++)
3182 if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
3183 goto one_matched;
3184
3185 if (accept_alternates)
3186 for (j = 0; alternates[j]; j++)
3187 if (g_str_has_prefix (alternates[j], term_tokens[i]))
3188 goto one_matched;
3189
3190 matched = FALSE;
3191 break;
3192
3193 one_matched:
3194 continue;
3195 }
3196
3197 g_strfreev (term_tokens);
3198 g_strfreev (hit_tokens);
3199 g_strfreev (alternates);
3200
3201 return matched;
3202 }
3203
3204 /**
3205 * g_strv_contains:
3206 * @strv: a %NULL-terminated array of strings
3207 * @str: a string
3208 *
3209 * Checks if @strv contains @str. @strv must not be %NULL.
3210 *
3211 * Returns: %TRUE if @str is an element of @strv, according to g_str_equal().
3212 *
3213 * Since: 2.44
3214 */
3215 gboolean
g_strv_contains(const gchar * const * strv,const gchar * str)3216 g_strv_contains (const gchar * const *strv,
3217 const gchar *str)
3218 {
3219 g_return_val_if_fail (strv != NULL, FALSE);
3220 g_return_val_if_fail (str != NULL, FALSE);
3221
3222 for (; *strv != NULL; strv++)
3223 {
3224 if (g_str_equal (str, *strv))
3225 return TRUE;
3226 }
3227
3228 return FALSE;
3229 }
3230
3231 /**
3232 * g_strv_equal:
3233 * @strv1: a %NULL-terminated array of strings
3234 * @strv2: another %NULL-terminated array of strings
3235 *
3236 * Checks if @strv1 and @strv2 contain exactly the same elements in exactly the
3237 * same order. Elements are compared using g_str_equal(). To match independently
3238 * of order, sort the arrays first (using g_qsort_with_data() or similar).
3239 *
3240 * Two empty arrays are considered equal. Neither @strv1 not @strv2 may be
3241 * %NULL.
3242 *
3243 * Returns: %TRUE if @strv1 and @strv2 are equal
3244 * Since: 2.60
3245 */
3246 gboolean
g_strv_equal(const gchar * const * strv1,const gchar * const * strv2)3247 g_strv_equal (const gchar * const *strv1,
3248 const gchar * const *strv2)
3249 {
3250 g_return_val_if_fail (strv1 != NULL, FALSE);
3251 g_return_val_if_fail (strv2 != NULL, FALSE);
3252
3253 if (strv1 == strv2)
3254 return TRUE;
3255
3256 for (; *strv1 != NULL && *strv2 != NULL; strv1++, strv2++)
3257 {
3258 if (!g_str_equal (*strv1, *strv2))
3259 return FALSE;
3260 }
3261
3262 return (*strv1 == NULL && *strv2 == NULL);
3263 }
3264
3265 static gboolean
str_has_sign(const gchar * str)3266 str_has_sign (const gchar *str)
3267 {
3268 return str[0] == '-' || str[0] == '+';
3269 }
3270
3271 static gboolean
str_has_hex_prefix(const gchar * str)3272 str_has_hex_prefix (const gchar *str)
3273 {
3274 return str[0] == '0' && g_ascii_tolower (str[1]) == 'x';
3275 }
3276
3277 /**
3278 * g_ascii_string_to_signed:
3279 * @str: a string
3280 * @base: base of a parsed number
3281 * @min: a lower bound (inclusive)
3282 * @max: an upper bound (inclusive)
3283 * @out_num: (out) (optional): a return location for a number
3284 * @error: a return location for #GError
3285 *
3286 * A convenience function for converting a string to a signed number.
3287 *
3288 * This function assumes that @str contains only a number of the given
3289 * @base that is within inclusive bounds limited by @min and @max. If
3290 * this is true, then the converted number is stored in @out_num. An
3291 * empty string is not a valid input. A string with leading or
3292 * trailing whitespace is also an invalid input.
3293 *
3294 * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3295 * not be prefixed with "0x" or "0X". Such a problem does not exist
3296 * for octal numbers, since they were usually prefixed with a zero
3297 * which does not change the value of the parsed number.
3298 *
3299 * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3300 * domain. If the input is invalid, the error code will be
3301 * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3302 * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3303 *
3304 * See g_ascii_strtoll() if you have more complex needs such as
3305 * parsing a string which starts with a number, but then has other
3306 * characters.
3307 *
3308 * Returns: %TRUE if @str was a number, otherwise %FALSE.
3309 *
3310 * Since: 2.54
3311 */
3312 gboolean
g_ascii_string_to_signed(const gchar * str,guint base,gint64 min,gint64 max,gint64 * out_num,GError ** error)3313 g_ascii_string_to_signed (const gchar *str,
3314 guint base,
3315 gint64 min,
3316 gint64 max,
3317 gint64 *out_num,
3318 GError **error)
3319 {
3320 gint64 number;
3321 const gchar *end_ptr = NULL;
3322 gint saved_errno = 0;
3323
3324 g_return_val_if_fail (str != NULL, FALSE);
3325 g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3326 g_return_val_if_fail (min <= max, FALSE);
3327 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3328
3329 if (str[0] == '\0')
3330 {
3331 g_set_error_literal (error,
3332 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3333 _("Empty string is not a number"));
3334 return FALSE;
3335 }
3336
3337 errno = 0;
3338 number = g_ascii_strtoll (str, (gchar **)&end_ptr, base);
3339 saved_errno = errno;
3340
3341 if (/* We do not allow leading whitespace, but g_ascii_strtoll
3342 * accepts it and just skips it, so we need to check for it
3343 * ourselves.
3344 */
3345 g_ascii_isspace (str[0]) ||
3346 /* We don't support hexadecimal numbers prefixed with 0x or
3347 * 0X.
3348 */
3349 (base == 16 &&
3350 (str_has_sign (str) ? str_has_hex_prefix (str + 1) : str_has_hex_prefix (str))) ||
3351 (saved_errno != 0 && saved_errno != ERANGE) ||
3352 end_ptr == NULL ||
3353 *end_ptr != '\0')
3354 {
3355 g_set_error (error,
3356 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3357 _("“%s” is not a signed number"), str);
3358 return FALSE;
3359 }
3360 if (saved_errno == ERANGE || number < min || number > max)
3361 {
3362 gchar *min_str = g_strdup_printf ("%" G_GINT64_FORMAT, min);
3363 gchar *max_str = g_strdup_printf ("%" G_GINT64_FORMAT, max);
3364
3365 g_set_error (error,
3366 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3367 _("Number “%s” is out of bounds [%s, %s]"),
3368 str, min_str, max_str);
3369 g_free (min_str);
3370 g_free (max_str);
3371 return FALSE;
3372 }
3373 if (out_num != NULL)
3374 *out_num = number;
3375 return TRUE;
3376 }
3377
3378 /**
3379 * g_ascii_string_to_unsigned:
3380 * @str: a string
3381 * @base: base of a parsed number
3382 * @min: a lower bound (inclusive)
3383 * @max: an upper bound (inclusive)
3384 * @out_num: (out) (optional): a return location for a number
3385 * @error: a return location for #GError
3386 *
3387 * A convenience function for converting a string to an unsigned number.
3388 *
3389 * This function assumes that @str contains only a number of the given
3390 * @base that is within inclusive bounds limited by @min and @max. If
3391 * this is true, then the converted number is stored in @out_num. An
3392 * empty string is not a valid input. A string with leading or
3393 * trailing whitespace is also an invalid input. A string with a leading sign
3394 * (`-` or `+`) is not a valid input for the unsigned parser.
3395 *
3396 * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3397 * not be prefixed with "0x" or "0X". Such a problem does not exist
3398 * for octal numbers, since they were usually prefixed with a zero
3399 * which does not change the value of the parsed number.
3400 *
3401 * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3402 * domain. If the input is invalid, the error code will be
3403 * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3404 * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3405 *
3406 * See g_ascii_strtoull() if you have more complex needs such as
3407 * parsing a string which starts with a number, but then has other
3408 * characters.
3409 *
3410 * Returns: %TRUE if @str was a number, otherwise %FALSE.
3411 *
3412 * Since: 2.54
3413 */
3414 gboolean
g_ascii_string_to_unsigned(const gchar * str,guint base,guint64 min,guint64 max,guint64 * out_num,GError ** error)3415 g_ascii_string_to_unsigned (const gchar *str,
3416 guint base,
3417 guint64 min,
3418 guint64 max,
3419 guint64 *out_num,
3420 GError **error)
3421 {
3422 guint64 number;
3423 const gchar *end_ptr = NULL;
3424 gint saved_errno = 0;
3425
3426 g_return_val_if_fail (str != NULL, FALSE);
3427 g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3428 g_return_val_if_fail (min <= max, FALSE);
3429 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3430
3431 if (str[0] == '\0')
3432 {
3433 g_set_error_literal (error,
3434 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3435 _("Empty string is not a number"));
3436 return FALSE;
3437 }
3438
3439 errno = 0;
3440 number = g_ascii_strtoull (str, (gchar **)&end_ptr, base);
3441 saved_errno = errno;
3442
3443 if (/* We do not allow leading whitespace, but g_ascii_strtoull
3444 * accepts it and just skips it, so we need to check for it
3445 * ourselves.
3446 */
3447 g_ascii_isspace (str[0]) ||
3448 /* Unsigned number should have no sign.
3449 */
3450 str_has_sign (str) ||
3451 /* We don't support hexadecimal numbers prefixed with 0x or
3452 * 0X.
3453 */
3454 (base == 16 && str_has_hex_prefix (str)) ||
3455 (saved_errno != 0 && saved_errno != ERANGE) ||
3456 end_ptr == NULL ||
3457 *end_ptr != '\0')
3458 {
3459 g_set_error (error,
3460 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3461 _("“%s” is not an unsigned number"), str);
3462 return FALSE;
3463 }
3464 if (saved_errno == ERANGE || number < min || number > max)
3465 {
3466 gchar *min_str = g_strdup_printf ("%" G_GUINT64_FORMAT, min);
3467 gchar *max_str = g_strdup_printf ("%" G_GUINT64_FORMAT, max);
3468
3469 g_set_error (error,
3470 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3471 _("Number “%s” is out of bounds [%s, %s]"),
3472 str, min_str, max_str);
3473 g_free (min_str);
3474 g_free (max_str);
3475 return FALSE;
3476 }
3477 if (out_num != NULL)
3478 *out_num = number;
3479 return TRUE;
3480 }
3481
3482 G_DEFINE_QUARK (g-number-parser-error-quark, g_number_parser_error)
3483