• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GLIB - Library of useful routines for C programming
2  * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 /*
19  * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
20  * file for a list of people on the GLib Team.  See the ChangeLog
21  * files for a list of changes.  These files are distributed with
22  * GLib at ftp://ftp.gtk.org/pub/gtk/.
23  */
24 
25 /*
26  * MT safe
27  */
28 
29 #include "config.h"
30 
31 #include <stdarg.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <locale.h>
35 #include <string.h>
36 #include <locale.h>
37 #include <errno.h>
38 #include <ctype.h>              /* For tolower() */
39 
40 #ifdef HAVE_XLOCALE_H
41 /* Needed on BSD/OS X for e.g. strtod_l */
42 #include <xlocale.h>
43 #endif
44 
45 #ifdef G_OS_WIN32
46 #include <windows.h>
47 #endif
48 
49 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
50 
51 #include "gstrfuncs.h"
52 
53 #include "gprintf.h"
54 #include "gprintfint.h"
55 #include "glibintl.h"
56 
57 
58 /**
59  * SECTION:string_utils
60  * @title: String Utility Functions
61  * @short_description: various string-related functions
62  *
63  * This section describes a number of utility functions for creating,
64  * duplicating, and manipulating strings.
65  *
66  * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
67  * g_vprintf(), g_vfprintf(), g_vsprintf() and g_vasprintf()
68  * are declared in the header `gprintf.h` which is not included in `glib.h`
69  * (otherwise using `glib.h` would drag in `stdio.h`), so you'll have to
70  * explicitly include `<glib/gprintf.h>` in order to use the GLib
71  * printf() functions.
72  *
73  * ## String precision pitfalls # {#string-precision}
74  *
75  * While you may use the printf() functions to format UTF-8 strings,
76  * notice that the precision of a \%Ns parameter is interpreted
77  * as the number of bytes, not characters to print. On top of that,
78  * the GNU libc implementation of the printf() functions has the
79  * "feature" that it checks that the string given for the \%Ns
80  * parameter consists of a whole number of characters in the current
81  * encoding. So, unless you are sure you are always going to be in an
82  * UTF-8 locale or your know your text is restricted to ASCII, avoid
83  * using \%Ns. If your intention is to format strings for a
84  * certain number of columns, then \%Ns is not a correct solution
85  * anyway, since it fails to take wide characters (see g_unichar_iswide())
86  * into account.
87  *
88  * Note also that there are various printf() parameters which are platform
89  * dependent. GLib provides platform independent macros for these parameters
90  * which should be used instead. A common example is %G_GUINT64_FORMAT, which
91  * should be used instead of `%llu` or similar parameters for formatting
92  * 64-bit integers. These macros are all named `G_*_FORMAT`; see
93  * [Basic Types][glib-Basic-Types].
94  */
95 
96 /**
97  * g_ascii_isalnum:
98  * @c: any character
99  *
100  * Determines whether a character is alphanumeric.
101  *
102  * Unlike the standard C library isalnum() function, this only
103  * recognizes standard ASCII letters and ignores the locale,
104  * returning %FALSE for all non-ASCII characters. Also, unlike
105  * the standard library function, this takes a char, not an int,
106  * so don't call it on %EOF, but no need to cast to #guchar before
107  * passing a possibly non-ASCII character in.
108  *
109  * Returns: %TRUE if @c is an ASCII alphanumeric character
110  */
111 
112 /**
113  * g_ascii_isalpha:
114  * @c: any character
115  *
116  * Determines whether a character is alphabetic (i.e. a letter).
117  *
118  * Unlike the standard C library isalpha() function, this only
119  * recognizes standard ASCII letters and ignores the locale,
120  * returning %FALSE for all non-ASCII characters. Also, unlike
121  * the standard library function, this takes a char, not an int,
122  * so don't call it on %EOF, but no need to cast to #guchar before
123  * passing a possibly non-ASCII character in.
124  *
125  * Returns: %TRUE if @c is an ASCII alphabetic character
126  */
127 
128 /**
129  * g_ascii_iscntrl:
130  * @c: any character
131  *
132  * Determines whether a character is a control character.
133  *
134  * Unlike the standard C library iscntrl() function, this only
135  * recognizes standard ASCII control characters and ignores the
136  * locale, returning %FALSE for all non-ASCII characters. Also,
137  * unlike the standard library function, this takes a char, not
138  * an int, so don't call it on %EOF, but no need to cast to #guchar
139  * before passing a possibly non-ASCII character in.
140  *
141  * Returns: %TRUE if @c is an ASCII control character.
142  */
143 
144 /**
145  * g_ascii_isdigit:
146  * @c: any character
147  *
148  * Determines whether a character is digit (0-9).
149  *
150  * Unlike the standard C library isdigit() function, this takes
151  * a char, not an int, so don't call it  on %EOF, but no need to
152  * cast to #guchar before passing a possibly non-ASCII character in.
153  *
154  * Returns: %TRUE if @c is an ASCII digit.
155  */
156 
157 /**
158  * g_ascii_isgraph:
159  * @c: any character
160  *
161  * Determines whether a character is a printing character and not a space.
162  *
163  * Unlike the standard C library isgraph() function, this only
164  * recognizes standard ASCII characters and ignores the locale,
165  * returning %FALSE for all non-ASCII characters. Also, unlike
166  * the standard library function, this takes a char, not an int,
167  * so don't call it on %EOF, but no need to cast to #guchar before
168  * passing a possibly non-ASCII character in.
169  *
170  * Returns: %TRUE if @c is an ASCII printing character other than space.
171  */
172 
173 /**
174  * g_ascii_islower:
175  * @c: any character
176  *
177  * Determines whether a character is an ASCII lower case letter.
178  *
179  * Unlike the standard C library islower() function, this only
180  * recognizes standard ASCII letters and ignores the locale,
181  * returning %FALSE for all non-ASCII characters. Also, unlike
182  * the standard library function, this takes a char, not an int,
183  * so don't call it on %EOF, but no need to worry about casting
184  * to #guchar before passing a possibly non-ASCII character in.
185  *
186  * Returns: %TRUE if @c is an ASCII lower case letter
187  */
188 
189 /**
190  * g_ascii_isprint:
191  * @c: any character
192  *
193  * Determines whether a character is a printing character.
194  *
195  * Unlike the standard C library isprint() function, this only
196  * recognizes standard ASCII characters and ignores the locale,
197  * returning %FALSE for all non-ASCII characters. Also, unlike
198  * the standard library function, this takes a char, not an int,
199  * so don't call it on %EOF, but no need to cast to #guchar before
200  * passing a possibly non-ASCII character in.
201  *
202  * Returns: %TRUE if @c is an ASCII printing character.
203  */
204 
205 /**
206  * g_ascii_ispunct:
207  * @c: any character
208  *
209  * Determines whether a character is a punctuation character.
210  *
211  * Unlike the standard C library ispunct() function, this only
212  * recognizes standard ASCII letters and ignores the locale,
213  * returning %FALSE for all non-ASCII characters. Also, unlike
214  * the standard library function, this takes a char, not an int,
215  * so don't call it on %EOF, but no need to cast to #guchar before
216  * passing a possibly non-ASCII character in.
217  *
218  * Returns: %TRUE if @c is an ASCII punctuation character.
219  */
220 
221 /**
222  * g_ascii_isspace:
223  * @c: any character
224  *
225  * Determines whether a character is a white-space character.
226  *
227  * Unlike the standard C library isspace() function, this only
228  * recognizes standard ASCII white-space and ignores the locale,
229  * returning %FALSE for all non-ASCII characters. Also, unlike
230  * the standard library function, this takes a char, not an int,
231  * so don't call it on %EOF, but no need to cast to #guchar before
232  * passing a possibly non-ASCII character in.
233  *
234  * Returns: %TRUE if @c is an ASCII white-space character
235  */
236 
237 /**
238  * g_ascii_isupper:
239  * @c: any character
240  *
241  * Determines whether a character is an ASCII upper case letter.
242  *
243  * Unlike the standard C library isupper() function, this only
244  * recognizes standard ASCII letters and ignores the locale,
245  * returning %FALSE for all non-ASCII characters. Also, unlike
246  * the standard library function, this takes a char, not an int,
247  * so don't call it on %EOF, but no need to worry about casting
248  * to #guchar before passing a possibly non-ASCII character in.
249  *
250  * Returns: %TRUE if @c is an ASCII upper case letter
251  */
252 
253 /**
254  * g_ascii_isxdigit:
255  * @c: any character
256  *
257  * Determines whether a character is a hexadecimal-digit character.
258  *
259  * Unlike the standard C library isxdigit() function, this takes
260  * a char, not an int, so don't call it on %EOF, but no need to
261  * cast to #guchar before passing a possibly non-ASCII character in.
262  *
263  * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
264  */
265 
266 /**
267  * G_ASCII_DTOSTR_BUF_SIZE:
268  *
269  * A good size for a buffer to be passed into g_ascii_dtostr().
270  * It is guaranteed to be enough for all output of that function
271  * on systems with 64bit IEEE-compatible doubles.
272  *
273  * The typical usage would be something like:
274  * |[<!-- language="C" -->
275  *   char buf[G_ASCII_DTOSTR_BUF_SIZE];
276  *
277  *   fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value));
278  * ]|
279  */
280 
281 /**
282  * g_strstrip:
283  * @string: a string to remove the leading and trailing whitespace from
284  *
285  * Removes leading and trailing whitespace from a string.
286  * See g_strchomp() and g_strchug().
287  *
288  * Returns: @string
289  */
290 
291 /**
292  * G_STR_DELIMITERS:
293  *
294  * The standard delimiters, used in g_strdelimit().
295  */
296 
297 static const guint16 ascii_table_data[256] = {
298   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
299   0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
300   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
301   0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
302   0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
303   0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
304   0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
305   0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
306   0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
307   0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
308   0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
309   0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
310   0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
311   0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
312   0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
313   0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
314   /* the upper 128 are all zeroes */
315 };
316 
317 const guint16 * const g_ascii_table = ascii_table_data;
318 
319 #if defined (HAVE_NEWLOCALE) && \
320     defined (HAVE_USELOCALE) && \
321     defined (HAVE_STRTOD_L) && \
322     defined (HAVE_STRTOULL_L) && \
323     defined (HAVE_STRTOLL_L)
324 #define USE_XLOCALE 1
325 #endif
326 
327 #ifdef USE_XLOCALE
328 static locale_t
get_C_locale(void)329 get_C_locale (void)
330 {
331   static gsize initialized = FALSE;
332   static locale_t C_locale = NULL;
333 
334   if (g_once_init_enter (&initialized))
335     {
336       C_locale = newlocale (LC_ALL_MASK, "C", NULL);
337       g_once_init_leave (&initialized, TRUE);
338     }
339 
340   return C_locale;
341 }
342 #endif
343 
344 /**
345  * g_strdup:
346  * @str: (nullable): the string to duplicate
347  *
348  * Duplicates a string. If @str is %NULL it returns %NULL.
349  * The returned string should be freed with g_free()
350  * when no longer needed.
351  *
352  * Returns: a newly-allocated copy of @str
353  */
354 gchar*
g_strdup(const gchar * str)355 g_strdup (const gchar *str)
356 {
357   gchar *new_str;
358   gsize length;
359 
360   if (str)
361     {
362       length = strlen (str) + 1;
363       new_str = g_new (char, length);
364       memcpy (new_str, str, length);
365     }
366   else
367     new_str = NULL;
368 
369   return new_str;
370 }
371 
372 /**
373  * g_memdup:
374  * @mem: the memory to copy.
375  * @byte_size: the number of bytes to copy.
376  *
377  * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
378  * from @mem. If @mem is %NULL it returns %NULL.
379  *
380  * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
381  *  is %NULL.
382  */
383 gpointer
g_memdup(gconstpointer mem,guint byte_size)384 g_memdup (gconstpointer mem,
385           guint         byte_size)
386 {
387   gpointer new_mem;
388 
389   if (mem && byte_size != 0)
390     {
391       new_mem = g_malloc (byte_size);
392       memcpy (new_mem, mem, byte_size);
393     }
394   else
395     new_mem = NULL;
396 
397   return new_mem;
398 }
399 
400 /**
401  * g_memdup2:
402  * @mem: (nullable): the memory to copy.
403  * @byte_size: the number of bytes to copy.
404  *
405  * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
406  * from @mem. If @mem is %NULL it returns %NULL.
407  *
408  * This replaces g_memdup(), which was prone to integer overflows when
409  * converting the argument from a #gsize to a #guint.
410  *
411  * Returns: (nullable): a pointer to the newly-allocated copy of the memory,
412  *    or %NULL if @mem is %NULL.
413  * Since: 2.68
414  */
415 gpointer
g_memdup2(gconstpointer mem,gsize byte_size)416 g_memdup2 (gconstpointer mem,
417            gsize         byte_size)
418 {
419   gpointer new_mem;
420 
421   if (mem && byte_size != 0)
422     {
423       new_mem = g_malloc (byte_size);
424       memcpy (new_mem, mem, byte_size);
425     }
426   else
427     new_mem = NULL;
428 
429   return new_mem;
430 }
431 
432 /**
433  * g_strndup:
434  * @str: the string to duplicate
435  * @n: the maximum number of bytes to copy from @str
436  *
437  * Duplicates the first @n bytes of a string, returning a newly-allocated
438  * buffer @n + 1 bytes long which will always be nul-terminated. If @str
439  * is less than @n bytes long the buffer is padded with nuls. If @str is
440  * %NULL it returns %NULL. The returned value should be freed when no longer
441  * needed.
442  *
443  * To copy a number of characters from a UTF-8 encoded string,
444  * use g_utf8_strncpy() instead.
445  *
446  * Returns: a newly-allocated buffer containing the first @n bytes
447  *     of @str, nul-terminated
448  */
449 gchar*
g_strndup(const gchar * str,gsize n)450 g_strndup (const gchar *str,
451            gsize        n)
452 {
453   gchar *new_str;
454 
455   if (str)
456     {
457       new_str = g_new (gchar, n + 1);
458       strncpy (new_str, str, n);
459       new_str[n] = '\0';
460     }
461   else
462     new_str = NULL;
463 
464   return new_str;
465 }
466 
467 /**
468  * g_strnfill:
469  * @length: the length of the new string
470  * @fill_char: the byte to fill the string with
471  *
472  * Creates a new string @length bytes long filled with @fill_char.
473  * The returned string should be freed when no longer needed.
474  *
475  * Returns: a newly-allocated string filled the @fill_char
476  */
477 gchar*
g_strnfill(gsize length,gchar fill_char)478 g_strnfill (gsize length,
479             gchar fill_char)
480 {
481   gchar *str;
482 
483   str = g_new (gchar, length + 1);
484   memset (str, (guchar)fill_char, length);
485   str[length] = '\0';
486 
487   return str;
488 }
489 
490 /**
491  * g_stpcpy:
492  * @dest: destination buffer.
493  * @src: source string.
494  *
495  * Copies a nul-terminated string into the dest buffer, include the
496  * trailing nul, and return a pointer to the trailing nul byte.
497  * This is useful for concatenating multiple strings together
498  * without having to repeatedly scan for the end.
499  *
500  * Returns: a pointer to trailing nul byte.
501  **/
502 gchar *
g_stpcpy(gchar * dest,const gchar * src)503 g_stpcpy (gchar       *dest,
504           const gchar *src)
505 {
506 #ifdef HAVE_STPCPY
507   g_return_val_if_fail (dest != NULL, NULL);
508   g_return_val_if_fail (src != NULL, NULL);
509   return stpcpy (dest, src);
510 #else
511   gchar *d = dest;
512   const gchar *s = src;
513 
514   g_return_val_if_fail (dest != NULL, NULL);
515   g_return_val_if_fail (src != NULL, NULL);
516   do
517     *d++ = *s;
518   while (*s++ != '\0');
519 
520   return d - 1;
521 #endif
522 }
523 
524 /**
525  * g_strdup_vprintf:
526  * @format: a standard printf() format string, but notice
527  *     [string precision pitfalls][string-precision]
528  * @args: the list of parameters to insert into the format string
529  *
530  * Similar to the standard C vsprintf() function but safer, since it
531  * calculates the maximum space required and allocates memory to hold
532  * the result. The returned string should be freed with g_free() when
533  * no longer needed.
534  *
535  * See also g_vasprintf(), which offers the same functionality, but
536  * additionally returns the length of the allocated string.
537  *
538  * Returns: a newly-allocated string holding the result
539  */
540 gchar*
g_strdup_vprintf(const gchar * format,va_list args)541 g_strdup_vprintf (const gchar *format,
542                   va_list      args)
543 {
544   gchar *string = NULL;
545 
546   g_vasprintf (&string, format, args);
547 
548   return string;
549 }
550 
551 /**
552  * g_strdup_printf:
553  * @format: a standard printf() format string, but notice
554  *     [string precision pitfalls][string-precision]
555  * @...: the parameters to insert into the format string
556  *
557  * Similar to the standard C sprintf() function but safer, since it
558  * calculates the maximum space required and allocates memory to hold
559  * the result. The returned string should be freed with g_free() when no
560  * longer needed.
561  *
562  * Returns: a newly-allocated string holding the result
563  */
564 gchar*
g_strdup_printf(const gchar * format,...)565 g_strdup_printf (const gchar *format,
566                  ...)
567 {
568   gchar *buffer;
569   va_list args;
570 
571   va_start (args, format);
572   buffer = g_strdup_vprintf (format, args);
573   va_end (args);
574 
575   return buffer;
576 }
577 
578 /**
579  * g_strconcat:
580  * @string1: the first string to add, which must not be %NULL
581  * @...: a %NULL-terminated list of strings to append to the string
582  *
583  * Concatenates all of the given strings into one long string. The
584  * returned string should be freed with g_free() when no longer needed.
585  *
586  * The variable argument list must end with %NULL. If you forget the %NULL,
587  * g_strconcat() will start appending random memory junk to your string.
588  *
589  * Note that this function is usually not the right function to use to
590  * assemble a translated message from pieces, since proper translation
591  * often requires the pieces to be reordered.
592  *
593  * Returns: a newly-allocated string containing all the string arguments
594  */
595 gchar*
g_strconcat(const gchar * string1,...)596 g_strconcat (const gchar *string1, ...)
597 {
598   gsize   l;
599   va_list args;
600   gchar   *s;
601   gchar   *concat;
602   gchar   *ptr;
603 
604   if (!string1)
605     return NULL;
606 
607   l = 1 + strlen (string1);
608   va_start (args, string1);
609   s = va_arg (args, gchar*);
610   while (s)
611     {
612       l += strlen (s);
613       s = va_arg (args, gchar*);
614     }
615   va_end (args);
616 
617   concat = g_new (gchar, l);
618   ptr = concat;
619 
620   ptr = g_stpcpy (ptr, string1);
621   va_start (args, string1);
622   s = va_arg (args, gchar*);
623   while (s)
624     {
625       ptr = g_stpcpy (ptr, s);
626       s = va_arg (args, gchar*);
627     }
628   va_end (args);
629 
630   return concat;
631 }
632 
633 /**
634  * g_strtod:
635  * @nptr:    the string to convert to a numeric value.
636  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
637  *           character after the last character used in the conversion.
638  *
639  * Converts a string to a #gdouble value.
640  * It calls the standard strtod() function to handle the conversion, but
641  * if the string is not completely converted it attempts the conversion
642  * again with g_ascii_strtod(), and returns the best match.
643  *
644  * This function should seldom be used. The normal situation when reading
645  * numbers not for human consumption is to use g_ascii_strtod(). Only when
646  * you know that you must expect both locale formatted and C formatted numbers
647  * should you use this. Make sure that you don't pass strings such as comma
648  * separated lists of values, since the commas may be interpreted as a decimal
649  * point in some locales, causing unexpected results.
650  *
651  * Returns: the #gdouble value.
652  **/
653 gdouble
g_strtod(const gchar * nptr,gchar ** endptr)654 g_strtod (const gchar *nptr,
655           gchar      **endptr)
656 {
657   gchar *fail_pos_1;
658   gchar *fail_pos_2;
659   gdouble val_1;
660   gdouble val_2 = 0;
661 
662   g_return_val_if_fail (nptr != NULL, 0);
663 
664   fail_pos_1 = NULL;
665   fail_pos_2 = NULL;
666 
667   val_1 = strtod (nptr, &fail_pos_1);
668 
669   if (fail_pos_1 && fail_pos_1[0] != 0)
670     val_2 = g_ascii_strtod (nptr, &fail_pos_2);
671 
672   if (!fail_pos_1 || fail_pos_1[0] == 0 || fail_pos_1 >= fail_pos_2)
673     {
674       if (endptr)
675         *endptr = fail_pos_1;
676       return val_1;
677     }
678   else
679     {
680       if (endptr)
681         *endptr = fail_pos_2;
682       return val_2;
683     }
684 }
685 
686 /**
687  * g_ascii_strtod:
688  * @nptr:    the string to convert to a numeric value.
689  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
690  *           character after the last character used in the conversion.
691  *
692  * Converts a string to a #gdouble value.
693  *
694  * This function behaves like the standard strtod() function
695  * does in the C locale. It does this without actually changing
696  * the current locale, since that would not be thread-safe.
697  * A limitation of the implementation is that this function
698  * will still accept localized versions of infinities and NANs.
699  *
700  * This function is typically used when reading configuration
701  * files or other non-user input that should be locale independent.
702  * To handle input from the user you should normally use the
703  * locale-sensitive system strtod() function.
704  *
705  * To convert from a #gdouble to a string in a locale-insensitive
706  * way, use g_ascii_dtostr().
707  *
708  * If the correct value would cause overflow, plus or minus %HUGE_VAL
709  * is returned (according to the sign of the value), and %ERANGE is
710  * stored in %errno. If the correct value would cause underflow,
711  * zero is returned and %ERANGE is stored in %errno.
712  *
713  * This function resets %errno before calling strtod() so that
714  * you can reliably detect overflow and underflow.
715  *
716  * Returns: the #gdouble value.
717  */
718 gdouble
g_ascii_strtod(const gchar * nptr,gchar ** endptr)719 g_ascii_strtod (const gchar *nptr,
720                 gchar      **endptr)
721 {
722 #ifdef USE_XLOCALE
723 
724   g_return_val_if_fail (nptr != NULL, 0);
725 
726   errno = 0;
727 
728   return strtod_l (nptr, endptr, get_C_locale ());
729 
730 #else
731 
732   gchar *fail_pos;
733   gdouble val;
734 #ifndef __BIONIC__
735   struct lconv *locale_data;
736 #endif
737   const char *decimal_point;
738   gsize decimal_point_len;
739   const char *p, *decimal_point_pos;
740   const char *end = NULL; /* Silence gcc */
741   int strtod_errno;
742 
743   g_return_val_if_fail (nptr != NULL, 0);
744 
745   fail_pos = NULL;
746 
747 #ifndef __BIONIC__
748   locale_data = localeconv ();
749   decimal_point = locale_data->decimal_point;
750   decimal_point_len = strlen (decimal_point);
751 #else
752   decimal_point = ".";
753   decimal_point_len = 1;
754 #endif
755 
756   g_assert (decimal_point_len != 0);
757 
758   decimal_point_pos = NULL;
759   end = NULL;
760 
761   if (decimal_point[0] != '.' ||
762       decimal_point[1] != 0)
763     {
764       p = nptr;
765       /* Skip leading space */
766       while (g_ascii_isspace (*p))
767         p++;
768 
769       /* Skip leading optional sign */
770       if (*p == '+' || *p == '-')
771         p++;
772 
773       if (p[0] == '0' &&
774           (p[1] == 'x' || p[1] == 'X'))
775         {
776           p += 2;
777           /* HEX - find the (optional) decimal point */
778 
779           while (g_ascii_isxdigit (*p))
780             p++;
781 
782           if (*p == '.')
783             decimal_point_pos = p++;
784 
785           while (g_ascii_isxdigit (*p))
786             p++;
787 
788           if (*p == 'p' || *p == 'P')
789             p++;
790           if (*p == '+' || *p == '-')
791             p++;
792           while (g_ascii_isdigit (*p))
793             p++;
794 
795           end = p;
796         }
797       else if (g_ascii_isdigit (*p) || *p == '.')
798         {
799           while (g_ascii_isdigit (*p))
800             p++;
801 
802           if (*p == '.')
803             decimal_point_pos = p++;
804 
805           while (g_ascii_isdigit (*p))
806             p++;
807 
808           if (*p == 'e' || *p == 'E')
809             p++;
810           if (*p == '+' || *p == '-')
811             p++;
812           while (g_ascii_isdigit (*p))
813             p++;
814 
815           end = p;
816         }
817       /* For the other cases, we need not convert the decimal point */
818     }
819 
820   if (decimal_point_pos)
821     {
822       char *copy, *c;
823 
824       /* We need to convert the '.' to the locale specific decimal point */
825       copy = g_malloc (end - nptr + 1 + decimal_point_len);
826 
827       c = copy;
828       memcpy (c, nptr, decimal_point_pos - nptr);
829       c += decimal_point_pos - nptr;
830       memcpy (c, decimal_point, decimal_point_len);
831       c += decimal_point_len;
832       memcpy (c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
833       c += end - (decimal_point_pos + 1);
834       *c = 0;
835 
836       errno = 0;
837       val = strtod (copy, &fail_pos);
838       strtod_errno = errno;
839 
840       if (fail_pos)
841         {
842           if (fail_pos - copy > decimal_point_pos - nptr)
843             fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
844           else
845             fail_pos = (char *)nptr + (fail_pos - copy);
846         }
847 
848       g_free (copy);
849 
850     }
851   else if (end)
852     {
853       char *copy;
854 
855       copy = g_malloc (end - (char *)nptr + 1);
856       memcpy (copy, nptr, end - nptr);
857       *(copy + (end - (char *)nptr)) = 0;
858 
859       errno = 0;
860       val = strtod (copy, &fail_pos);
861       strtod_errno = errno;
862 
863       if (fail_pos)
864         {
865           fail_pos = (char *)nptr + (fail_pos - copy);
866         }
867 
868       g_free (copy);
869     }
870   else
871     {
872       errno = 0;
873       val = strtod (nptr, &fail_pos);
874       strtod_errno = errno;
875     }
876 
877   if (endptr)
878     *endptr = fail_pos;
879 
880   errno = strtod_errno;
881 
882   return val;
883 #endif
884 }
885 
886 
887 /**
888  * g_ascii_dtostr:
889  * @buffer: A buffer to place the resulting string in
890  * @buf_len: The length of the buffer.
891  * @d: The #gdouble to convert
892  *
893  * Converts a #gdouble to a string, using the '.' as
894  * decimal point.
895  *
896  * This function generates enough precision that converting
897  * the string back using g_ascii_strtod() gives the same machine-number
898  * (on machines with IEEE compatible 64bit doubles). It is
899  * guaranteed that the size of the resulting string will never
900  * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes, including the terminating
901  * nul character, which is always added.
902  *
903  * Returns: The pointer to the buffer with the converted string.
904  **/
905 gchar *
g_ascii_dtostr(gchar * buffer,gint buf_len,gdouble d)906 g_ascii_dtostr (gchar       *buffer,
907                 gint         buf_len,
908                 gdouble      d)
909 {
910   return g_ascii_formatd (buffer, buf_len, "%.17g", d);
911 }
912 
913 #pragma GCC diagnostic push
914 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
915 
916 /**
917  * g_ascii_formatd:
918  * @buffer: A buffer to place the resulting string in
919  * @buf_len: The length of the buffer.
920  * @format: The printf()-style format to use for the
921  *          code to use for converting.
922  * @d: The #gdouble to convert
923  *
924  * Converts a #gdouble to a string, using the '.' as
925  * decimal point. To format the number you pass in
926  * a printf()-style format string. Allowed conversion
927  * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
928  *
929  * The returned buffer is guaranteed to be nul-terminated.
930  *
931  * If you just want to want to serialize the value into a
932  * string, use g_ascii_dtostr().
933  *
934  * Returns: The pointer to the buffer with the converted string.
935  */
936 gchar *
g_ascii_formatd(gchar * buffer,gint buf_len,const gchar * format,gdouble d)937 g_ascii_formatd (gchar       *buffer,
938                  gint         buf_len,
939                  const gchar *format,
940                  gdouble      d)
941 {
942 #ifdef USE_XLOCALE
943   locale_t old_locale;
944 
945   old_locale = uselocale (get_C_locale ());
946    _g_snprintf (buffer, buf_len, format, d);
947   uselocale (old_locale);
948 
949   return buffer;
950 #else
951 #ifndef __BIONIC__
952   struct lconv *locale_data;
953 #endif
954   const char *decimal_point;
955   gsize decimal_point_len;
956   gchar *p;
957   int rest_len;
958   gchar format_char;
959 
960   g_return_val_if_fail (buffer != NULL, NULL);
961   g_return_val_if_fail (format[0] == '%', NULL);
962   g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL);
963 
964   format_char = format[strlen (format) - 1];
965 
966   g_return_val_if_fail (format_char == 'e' || format_char == 'E' ||
967                         format_char == 'f' || format_char == 'F' ||
968                         format_char == 'g' || format_char == 'G',
969                         NULL);
970 
971   if (format[0] != '%')
972     return NULL;
973 
974   if (strpbrk (format + 1, "'l%"))
975     return NULL;
976 
977   if (!(format_char == 'e' || format_char == 'E' ||
978         format_char == 'f' || format_char == 'F' ||
979         format_char == 'g' || format_char == 'G'))
980     return NULL;
981 
982   _g_snprintf (buffer, buf_len, format, d);
983 
984 #ifndef __BIONIC__
985   locale_data = localeconv ();
986   decimal_point = locale_data->decimal_point;
987   decimal_point_len = strlen (decimal_point);
988 #else
989   decimal_point = ".";
990   decimal_point_len = 1;
991 #endif
992 
993   g_assert (decimal_point_len != 0);
994 
995   if (decimal_point[0] != '.' ||
996       decimal_point[1] != 0)
997     {
998       p = buffer;
999 
1000       while (g_ascii_isspace (*p))
1001         p++;
1002 
1003       if (*p == '+' || *p == '-')
1004         p++;
1005 
1006       while (isdigit ((guchar)*p))
1007         p++;
1008 
1009       if (strncmp (p, decimal_point, decimal_point_len) == 0)
1010         {
1011           *p = '.';
1012           p++;
1013           if (decimal_point_len > 1)
1014             {
1015               rest_len = strlen (p + (decimal_point_len - 1));
1016               memmove (p, p + (decimal_point_len - 1), rest_len);
1017               p[rest_len] = 0;
1018             }
1019         }
1020     }
1021 
1022   return buffer;
1023 #endif
1024 }
1025 #pragma GCC diagnostic pop
1026 
1027 #define ISSPACE(c)              ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
1028                                  (c) == '\r' || (c) == '\t' || (c) == '\v')
1029 #define ISUPPER(c)              ((c) >= 'A' && (c) <= 'Z')
1030 #define ISLOWER(c)              ((c) >= 'a' && (c) <= 'z')
1031 #define ISALPHA(c)              (ISUPPER (c) || ISLOWER (c))
1032 #define TOUPPER(c)              (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
1033 #define TOLOWER(c)              (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
1034 
1035 #ifndef USE_XLOCALE
1036 
1037 static guint64
g_parse_long_long(const gchar * nptr,const gchar ** endptr,guint base,gboolean * negative)1038 g_parse_long_long (const gchar  *nptr,
1039                    const gchar **endptr,
1040                    guint         base,
1041                    gboolean     *negative)
1042 {
1043   /* this code is based on on the strtol(3) code from GNU libc released under
1044    * the GNU Lesser General Public License.
1045    *
1046    * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
1047    *        Free Software Foundation, Inc.
1048    */
1049   gboolean overflow;
1050   guint64 cutoff;
1051   guint64 cutlim;
1052   guint64 ui64;
1053   const gchar *s, *save;
1054   guchar c;
1055 
1056   g_return_val_if_fail (nptr != NULL, 0);
1057 
1058   *negative = FALSE;
1059   if (base == 1 || base > 36)
1060     {
1061       errno = EINVAL;
1062       if (endptr)
1063         *endptr = nptr;
1064       return 0;
1065     }
1066 
1067   save = s = nptr;
1068 
1069   /* Skip white space.  */
1070   while (ISSPACE (*s))
1071     ++s;
1072 
1073   if (G_UNLIKELY (!*s))
1074     goto noconv;
1075 
1076   /* Check for a sign.  */
1077   if (*s == '-')
1078     {
1079       *negative = TRUE;
1080       ++s;
1081     }
1082   else if (*s == '+')
1083     ++s;
1084 
1085   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
1086   if (*s == '0')
1087     {
1088       if ((base == 0 || base == 16) && TOUPPER (s[1]) == 'X')
1089         {
1090           s += 2;
1091           base = 16;
1092         }
1093       else if (base == 0)
1094         base = 8;
1095     }
1096   else if (base == 0)
1097     base = 10;
1098 
1099   /* Save the pointer so we can check later if anything happened.  */
1100   save = s;
1101   cutoff = G_MAXUINT64 / base;
1102   cutlim = G_MAXUINT64 % base;
1103 
1104   overflow = FALSE;
1105   ui64 = 0;
1106   c = *s;
1107   for (; c; c = *++s)
1108     {
1109       if (c >= '0' && c <= '9')
1110         c -= '0';
1111       else if (ISALPHA (c))
1112         c = TOUPPER (c) - 'A' + 10;
1113       else
1114         break;
1115       if (c >= base)
1116         break;
1117       /* Check for overflow.  */
1118       if (ui64 > cutoff || (ui64 == cutoff && c > cutlim))
1119         overflow = TRUE;
1120       else
1121         {
1122           ui64 *= base;
1123           ui64 += c;
1124         }
1125     }
1126 
1127   /* Check if anything actually happened.  */
1128   if (s == save)
1129     goto noconv;
1130 
1131   /* Store in ENDPTR the address of one character
1132      past the last character we converted.  */
1133   if (endptr)
1134     *endptr = s;
1135 
1136   if (G_UNLIKELY (overflow))
1137     {
1138       errno = ERANGE;
1139       return G_MAXUINT64;
1140     }
1141 
1142   return ui64;
1143 
1144  noconv:
1145   /* We must handle a special case here: the base is 0 or 16 and the
1146      first two characters are '0' and 'x', but the rest are no
1147      hexadecimal digits.  This is no error case.  We return 0 and
1148      ENDPTR points to the `x`.  */
1149   if (endptr)
1150     {
1151       if (save - nptr >= 2 && TOUPPER (save[-1]) == 'X'
1152           && save[-2] == '0')
1153         *endptr = &save[-1];
1154       else
1155         /*  There was no number to convert.  */
1156         *endptr = nptr;
1157     }
1158   return 0;
1159 }
1160 #endif /* !USE_XLOCALE */
1161 
1162 /**
1163  * g_ascii_strtoull:
1164  * @nptr:    the string to convert to a numeric value.
1165  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
1166  *           character after the last character used in the conversion.
1167  * @base:    to be used for the conversion, 2..36 or 0
1168  *
1169  * Converts a string to a #guint64 value.
1170  * This function behaves like the standard strtoull() function
1171  * does in the C locale. It does this without actually
1172  * changing the current locale, since that would not be
1173  * thread-safe.
1174  *
1175  * Note that input with a leading minus sign (`-`) is accepted, and will return
1176  * the negation of the parsed number, unless that would overflow a #guint64.
1177  * Critically, this means you cannot assume that a short fixed length input will
1178  * never result in a low return value, as the input could have a leading `-`.
1179  *
1180  * This function is typically used when reading configuration
1181  * files or other non-user input that should be locale independent.
1182  * To handle input from the user you should normally use the
1183  * locale-sensitive system strtoull() function.
1184  *
1185  * If the correct value would cause overflow, %G_MAXUINT64
1186  * is returned, and `ERANGE` is stored in `errno`.
1187  * If the base is outside the valid range, zero is returned, and
1188  * `EINVAL` is stored in `errno`.
1189  * If the string conversion fails, zero is returned, and @endptr returns
1190  * @nptr (if @endptr is non-%NULL).
1191  *
1192  * Returns: the #guint64 value or zero on error.
1193  *
1194  * Since: 2.2
1195  */
1196 guint64
g_ascii_strtoull(const gchar * nptr,gchar ** endptr,guint base)1197 g_ascii_strtoull (const gchar *nptr,
1198                   gchar      **endptr,
1199                   guint        base)
1200 {
1201 #ifdef USE_XLOCALE
1202   return strtoull_l (nptr, endptr, base, get_C_locale ());
1203 #else
1204   gboolean negative;
1205   guint64 result;
1206 
1207   result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1208 
1209   /* Return the result of the appropriate sign.  */
1210   return negative ? -result : result;
1211 #endif
1212 }
1213 
1214 /**
1215  * g_ascii_strtoll:
1216  * @nptr:    the string to convert to a numeric value.
1217  * @endptr:  (out) (transfer none) (optional): if non-%NULL, it returns the
1218  *           character after the last character used in the conversion.
1219  * @base:    to be used for the conversion, 2..36 or 0
1220  *
1221  * Converts a string to a #gint64 value.
1222  * This function behaves like the standard strtoll() function
1223  * does in the C locale. It does this without actually
1224  * changing the current locale, since that would not be
1225  * thread-safe.
1226  *
1227  * This function is typically used when reading configuration
1228  * files or other non-user input that should be locale independent.
1229  * To handle input from the user you should normally use the
1230  * locale-sensitive system strtoll() function.
1231  *
1232  * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1233  * is returned, and `ERANGE` is stored in `errno`.
1234  * If the base is outside the valid range, zero is returned, and
1235  * `EINVAL` is stored in `errno`. If the
1236  * string conversion fails, zero is returned, and @endptr returns @nptr
1237  * (if @endptr is non-%NULL).
1238  *
1239  * Returns: the #gint64 value or zero on error.
1240  *
1241  * Since: 2.12
1242  */
1243 gint64
g_ascii_strtoll(const gchar * nptr,gchar ** endptr,guint base)1244 g_ascii_strtoll (const gchar *nptr,
1245                  gchar      **endptr,
1246                  guint        base)
1247 {
1248 #ifdef USE_XLOCALE
1249   return strtoll_l (nptr, endptr, base, get_C_locale ());
1250 #else
1251   gboolean negative;
1252   guint64 result;
1253 
1254   result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1255 
1256   if (negative && result > (guint64) G_MININT64)
1257     {
1258       errno = ERANGE;
1259       return G_MININT64;
1260     }
1261   else if (!negative && result > (guint64) G_MAXINT64)
1262     {
1263       errno = ERANGE;
1264       return G_MAXINT64;
1265     }
1266   else if (negative)
1267     return - (gint64) result;
1268   else
1269     return (gint64) result;
1270 #endif
1271 }
1272 
1273 /**
1274  * g_strerror:
1275  * @errnum: the system error number. See the standard C %errno
1276  *     documentation
1277  *
1278  * Returns a string corresponding to the given error code, e.g. "no
1279  * such process". Unlike strerror(), this always returns a string in
1280  * UTF-8 encoding, and the pointer is guaranteed to remain valid for
1281  * the lifetime of the process.
1282  *
1283  * Note that the string may be translated according to the current locale.
1284  *
1285  * The value of %errno will not be changed by this function. However, it may
1286  * be changed by intermediate function calls, so you should save its value
1287  * as soon as the call returns:
1288  * |[
1289  *   int saved_errno;
1290  *
1291  *   ret = read (blah);
1292  *   saved_errno = errno;
1293  *
1294  *   g_strerror (saved_errno);
1295  * ]|
1296  *
1297  * Returns: a UTF-8 string describing the error code. If the error code
1298  *     is unknown, it returns a string like "unknown error (<code>)".
1299  */
1300 const gchar *
g_strerror(gint errnum)1301 g_strerror (gint errnum)
1302 {
1303   static GHashTable *errors;
1304   G_LOCK_DEFINE_STATIC (errors);
1305   const gchar *msg;
1306   gint saved_errno = errno;
1307 
1308   G_LOCK (errors);
1309   if (errors)
1310     msg = g_hash_table_lookup (errors, GINT_TO_POINTER (errnum));
1311   else
1312     {
1313       errors = g_hash_table_new (NULL, NULL);
1314       msg = NULL;
1315     }
1316 
1317   if (!msg)
1318     {
1319       gchar buf[1024];
1320       GError *error = NULL;
1321 
1322 #if defined(G_OS_WIN32)
1323       strerror_s (buf, sizeof (buf), errnum);
1324       msg = buf;
1325 #elif defined(HAVE_STRERROR_R)
1326       /* Match the condition in strerror_r(3) for glibc */
1327 #  if defined(STRERROR_R_CHAR_P)
1328       msg = strerror_r (errnum, buf, sizeof (buf));
1329 #  else
1330       (void) strerror_r (errnum, buf, sizeof (buf));
1331       msg = buf;
1332 #  endif /* HAVE_STRERROR_R */
1333 #else
1334       g_strlcpy (buf, strerror (errnum), sizeof (buf));
1335       msg = buf;
1336 #endif
1337       if (!g_get_console_charset (NULL))
1338         {
1339           msg = g_locale_to_utf8 (msg, -1, NULL, NULL, &error);
1340           if (error)
1341             g_print ("%s\n", error->message);
1342         }
1343       else if (msg == (const gchar *)buf)
1344         msg = g_strdup (buf);
1345 
1346       g_hash_table_insert (errors, GINT_TO_POINTER (errnum), (char *) msg);
1347     }
1348   G_UNLOCK (errors);
1349 
1350   errno = saved_errno;
1351   return msg;
1352 }
1353 
1354 /**
1355  * g_strsignal:
1356  * @signum: the signal number. See the `signal` documentation
1357  *
1358  * Returns a string describing the given signal, e.g. "Segmentation fault".
1359  * You should use this function in preference to strsignal(), because it
1360  * returns a string in UTF-8 encoding, and since not all platforms support
1361  * the strsignal() function.
1362  *
1363  * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1364  *     it returns "unknown signal (<signum>)".
1365  */
1366 const gchar *
g_strsignal(gint signum)1367 g_strsignal (gint signum)
1368 {
1369   gchar *msg;
1370   gchar *tofree;
1371   const gchar *ret;
1372 
1373   msg = tofree = NULL;
1374 
1375 #ifdef HAVE_STRSIGNAL
1376   msg = strsignal (signum);
1377   if (!g_get_console_charset (NULL))
1378     msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1379 #endif
1380 
1381   if (!msg)
1382     msg = tofree = g_strdup_printf ("unknown signal (%d)", signum);
1383   ret = g_intern_string (msg);
1384   g_free (tofree);
1385 
1386   return ret;
1387 }
1388 
1389 /* Functions g_strlcpy and g_strlcat were originally developed by
1390  * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1391  * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1392  * for more information.
1393  */
1394 
1395 #ifdef HAVE_STRLCPY
1396 /* Use the native ones, if available; they might be implemented in assembly */
1397 gsize
g_strlcpy(gchar * dest,const gchar * src,gsize dest_size)1398 g_strlcpy (gchar       *dest,
1399            const gchar *src,
1400            gsize        dest_size)
1401 {
1402   g_return_val_if_fail (dest != NULL, 0);
1403   g_return_val_if_fail (src  != NULL, 0);
1404 
1405   return strlcpy (dest, src, dest_size);
1406 }
1407 
1408 gsize
g_strlcat(gchar * dest,const gchar * src,gsize dest_size)1409 g_strlcat (gchar       *dest,
1410            const gchar *src,
1411            gsize        dest_size)
1412 {
1413   g_return_val_if_fail (dest != NULL, 0);
1414   g_return_val_if_fail (src  != NULL, 0);
1415 
1416   return strlcat (dest, src, dest_size);
1417 }
1418 
1419 #else /* ! HAVE_STRLCPY */
1420 /**
1421  * g_strlcpy:
1422  * @dest: destination buffer
1423  * @src: source buffer
1424  * @dest_size: length of @dest in bytes
1425  *
1426  * Portability wrapper that calls strlcpy() on systems which have it,
1427  * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1428  * guaranteed to be nul-terminated; @src must be nul-terminated;
1429  * @dest_size is the buffer size, not the number of bytes to copy.
1430  *
1431  * At most @dest_size - 1 characters will be copied. Always nul-terminates
1432  * (unless @dest_size is 0). This function does not allocate memory. Unlike
1433  * strncpy(), this function doesn't pad @dest (so it's often faster). It
1434  * returns the size of the attempted result, strlen (src), so if
1435  * @retval >= @dest_size, truncation occurred.
1436  *
1437  * Caveat: strlcpy() is supposedly more secure than strcpy() or strncpy(),
1438  * but if you really want to avoid screwups, g_strdup() is an even better
1439  * idea.
1440  *
1441  * Returns: length of @src
1442  */
1443 gsize
g_strlcpy(gchar * dest,const gchar * src,gsize dest_size)1444 g_strlcpy (gchar       *dest,
1445            const gchar *src,
1446            gsize        dest_size)
1447 {
1448   gchar *d = dest;
1449   const gchar *s = src;
1450   gsize n = dest_size;
1451 
1452   g_return_val_if_fail (dest != NULL, 0);
1453   g_return_val_if_fail (src  != NULL, 0);
1454 
1455   /* Copy as many bytes as will fit */
1456   if (n != 0 && --n != 0)
1457     do
1458       {
1459         gchar c = *s++;
1460 
1461         *d++ = c;
1462         if (c == 0)
1463           break;
1464       }
1465     while (--n != 0);
1466 
1467   /* If not enough room in dest, add NUL and traverse rest of src */
1468   if (n == 0)
1469     {
1470       if (dest_size != 0)
1471         *d = 0;
1472       while (*s++)
1473         ;
1474     }
1475 
1476   return s - src - 1;  /* count does not include NUL */
1477 }
1478 
1479 /**
1480  * g_strlcat:
1481  * @dest: destination buffer, already containing one nul-terminated string
1482  * @src: source buffer
1483  * @dest_size: length of @dest buffer in bytes (not length of existing string
1484  *     inside @dest)
1485  *
1486  * Portability wrapper that calls strlcat() on systems which have it,
1487  * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1488  * guaranteeing nul-termination for @dest. The total size of @dest won't
1489  * exceed @dest_size.
1490  *
1491  * At most @dest_size - 1 characters will be copied. Unlike strncat(),
1492  * @dest_size is the full size of dest, not the space left over. This
1493  * function does not allocate memory. It always nul-terminates (unless
1494  * @dest_size == 0 or there were no nul characters in the @dest_size
1495  * characters of dest to start with).
1496  *
1497  * Caveat: this is supposedly a more secure alternative to strcat() or
1498  * strncat(), but for real security g_strconcat() is harder to mess up.
1499  *
1500  * Returns: size of attempted result, which is MIN (dest_size, strlen
1501  *     (original dest)) + strlen (src), so if retval >= dest_size,
1502  *     truncation occurred.
1503  */
1504 gsize
g_strlcat(gchar * dest,const gchar * src,gsize dest_size)1505 g_strlcat (gchar       *dest,
1506            const gchar *src,
1507            gsize        dest_size)
1508 {
1509   gchar *d = dest;
1510   const gchar *s = src;
1511   gsize bytes_left = dest_size;
1512   gsize dlength;  /* Logically, MIN (strlen (d), dest_size) */
1513 
1514   g_return_val_if_fail (dest != NULL, 0);
1515   g_return_val_if_fail (src  != NULL, 0);
1516 
1517   /* Find the end of dst and adjust bytes left but don't go past end */
1518   while (*d != 0 && bytes_left-- != 0)
1519     d++;
1520   dlength = d - dest;
1521   bytes_left = dest_size - dlength;
1522 
1523   if (bytes_left == 0)
1524     return dlength + strlen (s);
1525 
1526   while (*s != 0)
1527     {
1528       if (bytes_left != 1)
1529         {
1530           *d++ = *s;
1531           bytes_left--;
1532         }
1533       s++;
1534     }
1535   *d = 0;
1536 
1537   return dlength + (s - src);  /* count does not include NUL */
1538 }
1539 #endif /* ! HAVE_STRLCPY */
1540 
1541 /**
1542  * g_ascii_strdown:
1543  * @str: a string
1544  * @len: length of @str in bytes, or -1 if @str is nul-terminated
1545  *
1546  * Converts all upper case ASCII letters to lower case ASCII letters.
1547  *
1548  * Returns: a newly-allocated string, with all the upper case
1549  *     characters in @str converted to lower case, with semantics that
1550  *     exactly match g_ascii_tolower(). (Note that this is unlike the
1551  *     old g_strdown(), which modified the string in place.)
1552  */
1553 gchar*
g_ascii_strdown(const gchar * str,gssize len)1554 g_ascii_strdown (const gchar *str,
1555                  gssize       len)
1556 {
1557   gchar *result, *s;
1558 
1559   g_return_val_if_fail (str != NULL, NULL);
1560 
1561   if (len < 0)
1562     len = (gssize) strlen (str);
1563 
1564   result = g_strndup (str, (gsize) len);
1565   for (s = result; *s; s++)
1566     *s = g_ascii_tolower (*s);
1567 
1568   return result;
1569 }
1570 
1571 /**
1572  * g_ascii_strup:
1573  * @str: a string
1574  * @len: length of @str in bytes, or -1 if @str is nul-terminated
1575  *
1576  * Converts all lower case ASCII letters to upper case ASCII letters.
1577  *
1578  * Returns: a newly allocated string, with all the lower case
1579  *     characters in @str converted to upper case, with semantics that
1580  *     exactly match g_ascii_toupper(). (Note that this is unlike the
1581  *     old g_strup(), which modified the string in place.)
1582  */
1583 gchar*
g_ascii_strup(const gchar * str,gssize len)1584 g_ascii_strup (const gchar *str,
1585                gssize       len)
1586 {
1587   gchar *result, *s;
1588 
1589   g_return_val_if_fail (str != NULL, NULL);
1590 
1591   if (len < 0)
1592     len = (gssize) strlen (str);
1593 
1594   result = g_strndup (str, (gsize) len);
1595   for (s = result; *s; s++)
1596     *s = g_ascii_toupper (*s);
1597 
1598   return result;
1599 }
1600 
1601 /**
1602  * g_str_is_ascii:
1603  * @str: a string
1604  *
1605  * Determines if a string is pure ASCII. A string is pure ASCII if it
1606  * contains no bytes with the high bit set.
1607  *
1608  * Returns: %TRUE if @str is ASCII
1609  *
1610  * Since: 2.40
1611  */
1612 gboolean
g_str_is_ascii(const gchar * str)1613 g_str_is_ascii (const gchar *str)
1614 {
1615   gint i;
1616 
1617   for (i = 0; str[i]; i++)
1618     if (str[i] & 0x80)
1619       return FALSE;
1620 
1621   return TRUE;
1622 }
1623 
1624 /**
1625  * g_strdown:
1626  * @string: the string to convert.
1627  *
1628  * Converts a string to lower case.
1629  *
1630  * Returns: the string
1631  *
1632  * Deprecated:2.2: This function is totally broken for the reasons discussed
1633  * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1634  * instead.
1635  **/
1636 gchar*
g_strdown(gchar * string)1637 g_strdown (gchar *string)
1638 {
1639   guchar *s;
1640 
1641   g_return_val_if_fail (string != NULL, NULL);
1642 
1643   s = (guchar *) string;
1644 
1645   while (*s)
1646     {
1647       if (isupper (*s))
1648         *s = tolower (*s);
1649       s++;
1650     }
1651 
1652   return (gchar *) string;
1653 }
1654 
1655 /**
1656  * g_strup:
1657  * @string: the string to convert
1658  *
1659  * Converts a string to upper case.
1660  *
1661  * Returns: the string
1662  *
1663  * Deprecated:2.2: This function is totally broken for the reasons
1664  *     discussed in the g_strncasecmp() docs - use g_ascii_strup()
1665  *     or g_utf8_strup() instead.
1666  */
1667 gchar*
g_strup(gchar * string)1668 g_strup (gchar *string)
1669 {
1670   guchar *s;
1671 
1672   g_return_val_if_fail (string != NULL, NULL);
1673 
1674   s = (guchar *) string;
1675 
1676   while (*s)
1677     {
1678       if (islower (*s))
1679         *s = toupper (*s);
1680       s++;
1681     }
1682 
1683   return (gchar *) string;
1684 }
1685 
1686 /**
1687  * g_strreverse:
1688  * @string: the string to reverse
1689  *
1690  * Reverses all of the bytes in a string. For example,
1691  * `g_strreverse ("abcdef")` will result in "fedcba".
1692  *
1693  * Note that g_strreverse() doesn't work on UTF-8 strings
1694  * containing multibyte characters. For that purpose, use
1695  * g_utf8_strreverse().
1696  *
1697  * Returns: the same pointer passed in as @string
1698  */
1699 gchar*
g_strreverse(gchar * string)1700 g_strreverse (gchar *string)
1701 {
1702   g_return_val_if_fail (string != NULL, NULL);
1703 
1704   if (*string)
1705     {
1706       gchar *h, *t;
1707 
1708       h = string;
1709       t = string + strlen (string) - 1;
1710 
1711       while (h < t)
1712         {
1713           gchar c;
1714 
1715           c = *h;
1716           *h = *t;
1717           h++;
1718           *t = c;
1719           t--;
1720         }
1721     }
1722 
1723   return string;
1724 }
1725 
1726 /**
1727  * g_ascii_tolower:
1728  * @c: any character
1729  *
1730  * Convert a character to ASCII lower case.
1731  *
1732  * Unlike the standard C library tolower() function, this only
1733  * recognizes standard ASCII letters and ignores the locale, returning
1734  * all non-ASCII characters unchanged, even if they are lower case
1735  * letters in a particular character set. Also unlike the standard
1736  * library function, this takes and returns a char, not an int, so
1737  * don't call it on %EOF but no need to worry about casting to #guchar
1738  * before passing a possibly non-ASCII character in.
1739  *
1740  * Returns: the result of converting @c to lower case. If @c is
1741  *     not an ASCII upper case letter, @c is returned unchanged.
1742  */
1743 gchar
g_ascii_tolower(gchar c)1744 g_ascii_tolower (gchar c)
1745 {
1746   return g_ascii_isupper (c) ? c - 'A' + 'a' : c;
1747 }
1748 
1749 /**
1750  * g_ascii_toupper:
1751  * @c: any character
1752  *
1753  * Convert a character to ASCII upper case.
1754  *
1755  * Unlike the standard C library toupper() function, this only
1756  * recognizes standard ASCII letters and ignores the locale, returning
1757  * all non-ASCII characters unchanged, even if they are upper case
1758  * letters in a particular character set. Also unlike the standard
1759  * library function, this takes and returns a char, not an int, so
1760  * don't call it on %EOF but no need to worry about casting to #guchar
1761  * before passing a possibly non-ASCII character in.
1762  *
1763  * Returns: the result of converting @c to upper case. If @c is not
1764  *    an ASCII lower case letter, @c is returned unchanged.
1765  */
1766 gchar
g_ascii_toupper(gchar c)1767 g_ascii_toupper (gchar c)
1768 {
1769   return g_ascii_islower (c) ? c - 'a' + 'A' : c;
1770 }
1771 
1772 /**
1773  * g_ascii_digit_value:
1774  * @c: an ASCII character
1775  *
1776  * Determines the numeric value of a character as a decimal digit.
1777  * Differs from g_unichar_digit_value() because it takes a char, so
1778  * there's no worry about sign extension if characters are signed.
1779  *
1780  * Returns: If @c is a decimal digit (according to g_ascii_isdigit()),
1781  *    its numeric value. Otherwise, -1.
1782  */
1783 int
g_ascii_digit_value(gchar c)1784 g_ascii_digit_value (gchar c)
1785 {
1786   if (g_ascii_isdigit (c))
1787     return c - '0';
1788   return -1;
1789 }
1790 
1791 /**
1792  * g_ascii_xdigit_value:
1793  * @c: an ASCII character.
1794  *
1795  * Determines the numeric value of a character as a hexidecimal
1796  * digit. Differs from g_unichar_xdigit_value() because it takes
1797  * a char, so there's no worry about sign extension if characters
1798  * are signed.
1799  *
1800  * Returns: If @c is a hex digit (according to g_ascii_isxdigit()),
1801  *     its numeric value. Otherwise, -1.
1802  */
1803 int
g_ascii_xdigit_value(gchar c)1804 g_ascii_xdigit_value (gchar c)
1805 {
1806   if (c >= 'A' && c <= 'F')
1807     return c - 'A' + 10;
1808   if (c >= 'a' && c <= 'f')
1809     return c - 'a' + 10;
1810   return g_ascii_digit_value (c);
1811 }
1812 
1813 /**
1814  * g_ascii_strcasecmp:
1815  * @s1: string to compare with @s2
1816  * @s2: string to compare with @s1
1817  *
1818  * Compare two strings, ignoring the case of ASCII characters.
1819  *
1820  * Unlike the BSD strcasecmp() function, this only recognizes standard
1821  * ASCII letters and ignores the locale, treating all non-ASCII
1822  * bytes as if they are not letters.
1823  *
1824  * This function should be used only on strings that are known to be
1825  * in encodings where the bytes corresponding to ASCII letters always
1826  * represent themselves. This includes UTF-8 and the ISO-8859-*
1827  * charsets, but not for instance double-byte encodings like the
1828  * Windows Codepage 932, where the trailing bytes of double-byte
1829  * characters include all ASCII letters. If you compare two CP932
1830  * strings using this function, you will get false matches.
1831  *
1832  * Both @s1 and @s2 must be non-%NULL.
1833  *
1834  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1835  *     or a positive value if @s1 > @s2.
1836  */
1837 gint
g_ascii_strcasecmp(const gchar * s1,const gchar * s2)1838 g_ascii_strcasecmp (const gchar *s1,
1839                     const gchar *s2)
1840 {
1841   gint c1, c2;
1842 
1843   g_return_val_if_fail (s1 != NULL, 0);
1844   g_return_val_if_fail (s2 != NULL, 0);
1845 
1846   while (*s1 && *s2)
1847     {
1848       c1 = (gint)(guchar) TOLOWER (*s1);
1849       c2 = (gint)(guchar) TOLOWER (*s2);
1850       if (c1 != c2)
1851         return (c1 - c2);
1852       s1++; s2++;
1853     }
1854 
1855   return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1856 }
1857 
1858 /**
1859  * g_ascii_strncasecmp:
1860  * @s1: string to compare with @s2
1861  * @s2: string to compare with @s1
1862  * @n: number of characters to compare
1863  *
1864  * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1865  * characters after the first @n in each string.
1866  *
1867  * Unlike the BSD strcasecmp() function, this only recognizes standard
1868  * ASCII letters and ignores the locale, treating all non-ASCII
1869  * characters as if they are not letters.
1870  *
1871  * The same warning as in g_ascii_strcasecmp() applies: Use this
1872  * function only on strings known to be in encodings where bytes
1873  * corresponding to ASCII letters always represent themselves.
1874  *
1875  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1876  *     or a positive value if @s1 > @s2.
1877  */
1878 gint
g_ascii_strncasecmp(const gchar * s1,const gchar * s2,gsize n)1879 g_ascii_strncasecmp (const gchar *s1,
1880                      const gchar *s2,
1881                      gsize        n)
1882 {
1883   gint c1, c2;
1884 
1885   g_return_val_if_fail (s1 != NULL, 0);
1886   g_return_val_if_fail (s2 != NULL, 0);
1887 
1888   while (n && *s1 && *s2)
1889     {
1890       n -= 1;
1891       c1 = (gint)(guchar) TOLOWER (*s1);
1892       c2 = (gint)(guchar) TOLOWER (*s2);
1893       if (c1 != c2)
1894         return (c1 - c2);
1895       s1++; s2++;
1896     }
1897 
1898   if (n)
1899     return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1900   else
1901     return 0;
1902 }
1903 
1904 /**
1905  * g_strcasecmp:
1906  * @s1: a string
1907  * @s2: a string to compare with @s1
1908  *
1909  * A case-insensitive string comparison, corresponding to the standard
1910  * strcasecmp() function on platforms which support it.
1911  *
1912  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1913  *     or a positive value if @s1 > @s2.
1914  *
1915  * Deprecated:2.2: See g_strncasecmp() for a discussion of why this
1916  *     function is deprecated and how to replace it.
1917  */
1918 gint
g_strcasecmp(const gchar * s1,const gchar * s2)1919 g_strcasecmp (const gchar *s1,
1920               const gchar *s2)
1921 {
1922 #ifdef HAVE_STRCASECMP
1923   g_return_val_if_fail (s1 != NULL, 0);
1924   g_return_val_if_fail (s2 != NULL, 0);
1925 
1926   return strcasecmp (s1, s2);
1927 #else
1928   gint c1, c2;
1929 
1930   g_return_val_if_fail (s1 != NULL, 0);
1931   g_return_val_if_fail (s2 != NULL, 0);
1932 
1933   while (*s1 && *s2)
1934     {
1935       /* According to A. Cox, some platforms have islower's that
1936        * don't work right on non-uppercase
1937        */
1938       c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1939       c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1940       if (c1 != c2)
1941         return (c1 - c2);
1942       s1++; s2++;
1943     }
1944 
1945   return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1946 #endif
1947 }
1948 
1949 /**
1950  * g_strncasecmp:
1951  * @s1: a string
1952  * @s2: a string to compare with @s1
1953  * @n: the maximum number of characters to compare
1954  *
1955  * A case-insensitive string comparison, corresponding to the standard
1956  * strncasecmp() function on platforms which support it. It is similar
1957  * to g_strcasecmp() except it only compares the first @n characters of
1958  * the strings.
1959  *
1960  * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1961  *     or a positive value if @s1 > @s2.
1962  *
1963  * Deprecated:2.2: The problem with g_strncasecmp() is that it does
1964  *     the comparison by calling toupper()/tolower(). These functions
1965  *     are locale-specific and operate on single bytes. However, it is
1966  *     impossible to handle things correctly from an internationalization
1967  *     standpoint by operating on bytes, since characters may be multibyte.
1968  *     Thus g_strncasecmp() is broken if your string is guaranteed to be
1969  *     ASCII, since it is locale-sensitive, and it's broken if your string
1970  *     is localized, since it doesn't work on many encodings at all,
1971  *     including UTF-8, EUC-JP, etc.
1972  *
1973  *     There are therefore two replacement techniques: g_ascii_strncasecmp(),
1974  *     which only works on ASCII and is not locale-sensitive, and
1975  *     g_utf8_casefold() followed by strcmp() on the resulting strings,
1976  *     which is good for case-insensitive sorting of UTF-8.
1977  */
1978 gint
g_strncasecmp(const gchar * s1,const gchar * s2,guint n)1979 g_strncasecmp (const gchar *s1,
1980                const gchar *s2,
1981                guint n)
1982 {
1983 #ifdef HAVE_STRNCASECMP
1984   return strncasecmp (s1, s2, n);
1985 #else
1986   gint c1, c2;
1987 
1988   g_return_val_if_fail (s1 != NULL, 0);
1989   g_return_val_if_fail (s2 != NULL, 0);
1990 
1991   while (n && *s1 && *s2)
1992     {
1993       n -= 1;
1994       /* According to A. Cox, some platforms have islower's that
1995        * don't work right on non-uppercase
1996        */
1997       c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1998       c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1999       if (c1 != c2)
2000         return (c1 - c2);
2001       s1++; s2++;
2002     }
2003 
2004   if (n)
2005     return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
2006   else
2007     return 0;
2008 #endif
2009 }
2010 
2011 /**
2012  * g_strdelimit:
2013  * @string: the string to convert
2014  * @delimiters: (nullable): a string containing the current delimiters,
2015  *     or %NULL to use the standard delimiters defined in #G_STR_DELIMITERS
2016  * @new_delimiter: the new delimiter character
2017  *
2018  * Converts any delimiter characters in @string to @new_delimiter.
2019  * Any characters in @string which are found in @delimiters are
2020  * changed to the @new_delimiter character. Modifies @string in place,
2021  * and returns @string itself, not a copy. The return value is to
2022  * allow nesting such as
2023  * |[<!-- language="C" -->
2024  *   g_ascii_strup (g_strdelimit (str, "abc", '?'))
2025  * ]|
2026  *
2027  * In order to modify a copy, you may use `g_strdup()`:
2028  * |[<!-- language="C" -->
2029  *   reformatted = g_strdelimit (g_strdup (const_str), "abc", '?');
2030  *   ...
2031  *   g_free (reformatted);
2032  * ]|
2033  *
2034  * Returns: @string
2035  */
2036 gchar *
g_strdelimit(gchar * string,const gchar * delimiters,gchar new_delim)2037 g_strdelimit (gchar       *string,
2038               const gchar *delimiters,
2039               gchar        new_delim)
2040 {
2041   gchar *c;
2042 
2043   g_return_val_if_fail (string != NULL, NULL);
2044 
2045   if (!delimiters)
2046     delimiters = G_STR_DELIMITERS;
2047 
2048   for (c = string; *c; c++)
2049     {
2050       if (strchr (delimiters, *c))
2051         *c = new_delim;
2052     }
2053 
2054   return string;
2055 }
2056 
2057 /**
2058  * g_strcanon:
2059  * @string: a nul-terminated array of bytes
2060  * @valid_chars: bytes permitted in @string
2061  * @substitutor: replacement character for disallowed bytes
2062  *
2063  * For each character in @string, if the character is not in @valid_chars,
2064  * replaces the character with @substitutor. Modifies @string in place,
2065  * and return @string itself, not a copy. The return value is to allow
2066  * nesting such as
2067  * |[<!-- language="C" -->
2068  *   g_ascii_strup (g_strcanon (str, "abc", '?'))
2069  * ]|
2070  *
2071  * In order to modify a copy, you may use `g_strdup()`:
2072  * |[<!-- language="C" -->
2073  *   reformatted = g_strcanon (g_strdup (const_str), "abc", '?');
2074  *   ...
2075  *   g_free (reformatted);
2076  * ]|
2077  *
2078  * Returns: @string
2079  */
2080 gchar *
g_strcanon(gchar * string,const gchar * valid_chars,gchar substitutor)2081 g_strcanon (gchar       *string,
2082             const gchar *valid_chars,
2083             gchar        substitutor)
2084 {
2085   gchar *c;
2086 
2087   g_return_val_if_fail (string != NULL, NULL);
2088   g_return_val_if_fail (valid_chars != NULL, NULL);
2089 
2090   for (c = string; *c; c++)
2091     {
2092       if (!strchr (valid_chars, *c))
2093         *c = substitutor;
2094     }
2095 
2096   return string;
2097 }
2098 
2099 /**
2100  * g_strcompress:
2101  * @source: a string to compress
2102  *
2103  * Replaces all escaped characters with their one byte equivalent.
2104  *
2105  * This function does the reverse conversion of g_strescape().
2106  *
2107  * Returns: a newly-allocated copy of @source with all escaped
2108  *     character compressed
2109  */
2110 gchar *
g_strcompress(const gchar * source)2111 g_strcompress (const gchar *source)
2112 {
2113   const gchar *p = source, *octal;
2114   gchar *dest;
2115   gchar *q;
2116 
2117   g_return_val_if_fail (source != NULL, NULL);
2118 
2119   dest = g_malloc (strlen (source) + 1);
2120   q = dest;
2121 
2122   while (*p)
2123     {
2124       if (*p == '\\')
2125         {
2126           p++;
2127           switch (*p)
2128             {
2129             case '\0':
2130               g_warning ("g_strcompress: trailing \\");
2131               goto out;
2132             case '0':  case '1':  case '2':  case '3':  case '4':
2133             case '5':  case '6':  case '7':
2134               *q = 0;
2135               octal = p;
2136               while ((p < octal + 3) && (*p >= '0') && (*p <= '7'))
2137                 {
2138                   *q = (*q * 8) + (*p - '0');
2139                   p++;
2140                 }
2141               q++;
2142               p--;
2143               break;
2144             case 'b':
2145               *q++ = '\b';
2146               break;
2147             case 'f':
2148               *q++ = '\f';
2149               break;
2150             case 'n':
2151               *q++ = '\n';
2152               break;
2153             case 'r':
2154               *q++ = '\r';
2155               break;
2156             case 't':
2157               *q++ = '\t';
2158               break;
2159             case 'v':
2160               *q++ = '\v';
2161               break;
2162             default:            /* Also handles \" and \\ */
2163               *q++ = *p;
2164               break;
2165             }
2166         }
2167       else
2168         *q++ = *p;
2169       p++;
2170     }
2171 out:
2172   *q = 0;
2173 
2174   return dest;
2175 }
2176 
2177 /**
2178  * g_strescape:
2179  * @source: a string to escape
2180  * @exceptions: (nullable): a string of characters not to escape in @source
2181  *
2182  * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\'
2183  * and '"' in the string @source by inserting a '\' before
2184  * them. Additionally all characters in the range 0x01-0x1F (everything
2185  * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2186  * replaced with a '\' followed by their octal representation.
2187  * Characters supplied in @exceptions are not escaped.
2188  *
2189  * g_strcompress() does the reverse conversion.
2190  *
2191  * Returns: a newly-allocated copy of @source with certain
2192  *     characters escaped. See above.
2193  */
2194 gchar *
g_strescape(const gchar * source,const gchar * exceptions)2195 g_strescape (const gchar *source,
2196              const gchar *exceptions)
2197 {
2198   const guchar *p;
2199   gchar *dest;
2200   gchar *q;
2201   guchar excmap[256];
2202 
2203   g_return_val_if_fail (source != NULL, NULL);
2204 
2205   p = (guchar *) source;
2206   /* Each source byte needs maximally four destination chars (\777) */
2207   q = dest = g_malloc (strlen (source) * 4 + 1);
2208 
2209   memset (excmap, 0, 256);
2210   if (exceptions)
2211     {
2212       guchar *e = (guchar *) exceptions;
2213 
2214       while (*e)
2215         {
2216           excmap[*e] = 1;
2217           e++;
2218         }
2219     }
2220 
2221   while (*p)
2222     {
2223       if (excmap[*p])
2224         *q++ = *p;
2225       else
2226         {
2227           switch (*p)
2228             {
2229             case '\b':
2230               *q++ = '\\';
2231               *q++ = 'b';
2232               break;
2233             case '\f':
2234               *q++ = '\\';
2235               *q++ = 'f';
2236               break;
2237             case '\n':
2238               *q++ = '\\';
2239               *q++ = 'n';
2240               break;
2241             case '\r':
2242               *q++ = '\\';
2243               *q++ = 'r';
2244               break;
2245             case '\t':
2246               *q++ = '\\';
2247               *q++ = 't';
2248               break;
2249             case '\v':
2250               *q++ = '\\';
2251               *q++ = 'v';
2252               break;
2253             case '\\':
2254               *q++ = '\\';
2255               *q++ = '\\';
2256               break;
2257             case '"':
2258               *q++ = '\\';
2259               *q++ = '"';
2260               break;
2261             default:
2262               if ((*p < ' ') || (*p >= 0177))
2263                 {
2264                   *q++ = '\\';
2265                   *q++ = '0' + (((*p) >> 6) & 07);
2266                   *q++ = '0' + (((*p) >> 3) & 07);
2267                   *q++ = '0' + ((*p) & 07);
2268                 }
2269               else
2270                 *q++ = *p;
2271               break;
2272             }
2273         }
2274       p++;
2275     }
2276   *q = 0;
2277   return dest;
2278 }
2279 
2280 /**
2281  * g_strchug:
2282  * @string: a string to remove the leading whitespace from
2283  *
2284  * Removes leading whitespace from a string, by moving the rest
2285  * of the characters forward.
2286  *
2287  * This function doesn't allocate or reallocate any memory;
2288  * it modifies @string in place. Therefore, it cannot be used on
2289  * statically allocated strings.
2290  *
2291  * The pointer to @string is returned to allow the nesting of functions.
2292  *
2293  * Also see g_strchomp() and g_strstrip().
2294  *
2295  * Returns: @string
2296  */
2297 gchar *
g_strchug(gchar * string)2298 g_strchug (gchar *string)
2299 {
2300   guchar *start;
2301 
2302   g_return_val_if_fail (string != NULL, NULL);
2303 
2304   for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++)
2305     ;
2306 
2307   memmove (string, start, strlen ((gchar *) start) + 1);
2308 
2309   return string;
2310 }
2311 
2312 /**
2313  * g_strchomp:
2314  * @string: a string to remove the trailing whitespace from
2315  *
2316  * Removes trailing whitespace from a string.
2317  *
2318  * This function doesn't allocate or reallocate any memory;
2319  * it modifies @string in place. Therefore, it cannot be used
2320  * on statically allocated strings.
2321  *
2322  * The pointer to @string is returned to allow the nesting of functions.
2323  *
2324  * Also see g_strchug() and g_strstrip().
2325  *
2326  * Returns: @string
2327  */
2328 gchar *
g_strchomp(gchar * string)2329 g_strchomp (gchar *string)
2330 {
2331   gsize len;
2332 
2333   g_return_val_if_fail (string != NULL, NULL);
2334 
2335   len = strlen (string);
2336   while (len--)
2337     {
2338       if (g_ascii_isspace ((guchar) string[len]))
2339         string[len] = '\0';
2340       else
2341         break;
2342     }
2343 
2344   return string;
2345 }
2346 
2347 /**
2348  * g_strsplit:
2349  * @string: a string to split
2350  * @delimiter: a string which specifies the places at which to split
2351  *     the string. The delimiter is not included in any of the resulting
2352  *     strings, unless @max_tokens is reached.
2353  * @max_tokens: the maximum number of pieces to split @string into.
2354  *     If this is less than 1, the string is split completely.
2355  *
2356  * Splits a string into a maximum of @max_tokens pieces, using the given
2357  * @delimiter. If @max_tokens is reached, the remainder of @string is
2358  * appended to the last token.
2359  *
2360  * As an example, the result of g_strsplit (":a:bc::d:", ":", -1) is a
2361  * %NULL-terminated vector containing the six strings "", "a", "bc", "", "d"
2362  * and "".
2363  *
2364  * As a special case, the result of splitting the empty string "" is an empty
2365  * vector, not a vector containing a single string. The reason for this
2366  * special case is that being able to represent an empty vector is typically
2367  * more useful than consistent handling of empty elements. If you do need
2368  * to represent empty elements, you'll need to check for the empty string
2369  * before calling g_strsplit().
2370  *
2371  * Returns: a newly-allocated %NULL-terminated array of strings. Use
2372  *    g_strfreev() to free it.
2373  */
2374 gchar**
g_strsplit(const gchar * string,const gchar * delimiter,gint max_tokens)2375 g_strsplit (const gchar *string,
2376             const gchar *delimiter,
2377             gint         max_tokens)
2378 {
2379   GSList *string_list = NULL, *slist;
2380   gchar **str_array, *s;
2381   guint n = 0;
2382   const gchar *remainder;
2383 
2384   g_return_val_if_fail (string != NULL, NULL);
2385   g_return_val_if_fail (delimiter != NULL, NULL);
2386   g_return_val_if_fail (delimiter[0] != '\0', NULL);
2387 
2388   if (max_tokens < 1)
2389     max_tokens = G_MAXINT;
2390 
2391   remainder = string;
2392   s = strstr (remainder, delimiter);
2393   if (s)
2394     {
2395       gsize delimiter_len = strlen (delimiter);
2396 
2397       while (--max_tokens && s)
2398         {
2399           gsize len;
2400 
2401           len = s - remainder;
2402           string_list = g_slist_prepend (string_list,
2403                                          g_strndup (remainder, len));
2404           n++;
2405           remainder = s + delimiter_len;
2406           s = strstr (remainder, delimiter);
2407         }
2408     }
2409   if (*string)
2410     {
2411       n++;
2412       string_list = g_slist_prepend (string_list, g_strdup (remainder));
2413     }
2414 
2415   str_array = g_new (gchar*, n + 1);
2416 
2417   str_array[n--] = NULL;
2418   for (slist = string_list; slist; slist = slist->next)
2419     str_array[n--] = slist->data;
2420 
2421   g_slist_free (string_list);
2422 
2423   return str_array;
2424 }
2425 
2426 /**
2427  * g_strsplit_set:
2428  * @string: The string to be tokenized
2429  * @delimiters: A nul-terminated string containing bytes that are used
2430  *     to split the string.
2431  * @max_tokens: The maximum number of tokens to split @string into.
2432  *     If this is less than 1, the string is split completely
2433  *
2434  * Splits @string into a number of tokens not containing any of the characters
2435  * in @delimiter. A token is the (possibly empty) longest string that does not
2436  * contain any of the characters in @delimiters. If @max_tokens is reached, the
2437  * remainder is appended to the last token.
2438  *
2439  * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2440  * %NULL-terminated vector containing the three strings "abc", "def",
2441  * and "ghi".
2442  *
2443  * The result of g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2444  * vector containing the four strings "", "def", "ghi", and "".
2445  *
2446  * As a special case, the result of splitting the empty string "" is an empty
2447  * vector, not a vector containing a single string. The reason for this
2448  * special case is that being able to represent an empty vector is typically
2449  * more useful than consistent handling of empty elements. If you do need
2450  * to represent empty elements, you'll need to check for the empty string
2451  * before calling g_strsplit_set().
2452  *
2453  * Note that this function works on bytes not characters, so it can't be used
2454  * to delimit UTF-8 strings for anything but ASCII characters.
2455  *
2456  * Returns: a newly-allocated %NULL-terminated array of strings. Use
2457  *    g_strfreev() to free it.
2458  *
2459  * Since: 2.4
2460  **/
2461 gchar **
g_strsplit_set(const gchar * string,const gchar * delimiters,gint max_tokens)2462 g_strsplit_set (const gchar *string,
2463                 const gchar *delimiters,
2464                 gint         max_tokens)
2465 {
2466   gboolean delim_table[256];
2467   GSList *tokens, *list;
2468   gint n_tokens;
2469   const gchar *s;
2470   const gchar *current;
2471   gchar *token;
2472   gchar **result;
2473 
2474   g_return_val_if_fail (string != NULL, NULL);
2475   g_return_val_if_fail (delimiters != NULL, NULL);
2476 
2477   if (max_tokens < 1)
2478     max_tokens = G_MAXINT;
2479 
2480   if (*string == '\0')
2481     {
2482       result = g_new (char *, 1);
2483       result[0] = NULL;
2484       return result;
2485     }
2486 
2487   memset (delim_table, FALSE, sizeof (delim_table));
2488   for (s = delimiters; *s != '\0'; ++s)
2489     delim_table[*(guchar *)s] = TRUE;
2490 
2491   tokens = NULL;
2492   n_tokens = 0;
2493 
2494   s = current = string;
2495   while (*s != '\0')
2496     {
2497       if (delim_table[*(guchar *)s] && n_tokens + 1 < max_tokens)
2498         {
2499           token = g_strndup (current, s - current);
2500           tokens = g_slist_prepend (tokens, token);
2501           ++n_tokens;
2502 
2503           current = s + 1;
2504         }
2505 
2506       ++s;
2507     }
2508 
2509   token = g_strndup (current, s - current);
2510   tokens = g_slist_prepend (tokens, token);
2511   ++n_tokens;
2512 
2513   result = g_new (gchar *, n_tokens + 1);
2514 
2515   result[n_tokens] = NULL;
2516   for (list = tokens; list != NULL; list = list->next)
2517     result[--n_tokens] = list->data;
2518 
2519   g_slist_free (tokens);
2520 
2521   return result;
2522 }
2523 
2524 /**
2525  * GStrv:
2526  *
2527  * A typedef alias for gchar**. This is mostly useful when used together with
2528  * g_auto().
2529  */
2530 
2531 /**
2532  * g_strfreev:
2533  * @str_array: (nullable): a %NULL-terminated array of strings to free
2534  *
2535  * Frees a %NULL-terminated array of strings, as well as each
2536  * string it contains.
2537  *
2538  * If @str_array is %NULL, this function simply returns.
2539  */
2540 void
g_strfreev(gchar ** str_array)2541 g_strfreev (gchar **str_array)
2542 {
2543   if (str_array)
2544     {
2545       int i;
2546 
2547       for (i = 0; str_array[i] != NULL; i++)
2548         g_free (str_array[i]);
2549 
2550       g_free (str_array);
2551     }
2552 }
2553 
2554 /**
2555  * g_strdupv:
2556  * @str_array: (nullable): a %NULL-terminated array of strings
2557  *
2558  * Copies %NULL-terminated array of strings. The copy is a deep copy;
2559  * the new array should be freed by first freeing each string, then
2560  * the array itself. g_strfreev() does this for you. If called
2561  * on a %NULL value, g_strdupv() simply returns %NULL.
2562  *
2563  * Returns: (nullable): a new %NULL-terminated array of strings.
2564  */
2565 gchar**
g_strdupv(gchar ** str_array)2566 g_strdupv (gchar **str_array)
2567 {
2568   if (str_array)
2569     {
2570       gint i;
2571       gchar **retval;
2572 
2573       i = 0;
2574       while (str_array[i])
2575         ++i;
2576 
2577       retval = g_new (gchar*, i + 1);
2578 
2579       i = 0;
2580       while (str_array[i])
2581         {
2582           retval[i] = g_strdup (str_array[i]);
2583           ++i;
2584         }
2585       retval[i] = NULL;
2586 
2587       return retval;
2588     }
2589   else
2590     return NULL;
2591 }
2592 
2593 /**
2594  * g_strjoinv:
2595  * @separator: (nullable): a string to insert between each of the
2596  *     strings, or %NULL
2597  * @str_array: a %NULL-terminated array of strings to join
2598  *
2599  * Joins a number of strings together to form one long string, with the
2600  * optional @separator inserted between each of them. The returned string
2601  * should be freed with g_free().
2602  *
2603  * If @str_array has no items, the return value will be an
2604  * empty string. If @str_array contains a single item, @separator will not
2605  * appear in the resulting string.
2606  *
2607  * Returns: a newly-allocated string containing all of the strings joined
2608  *     together, with @separator between them
2609  */
2610 gchar*
g_strjoinv(const gchar * separator,gchar ** str_array)2611 g_strjoinv (const gchar  *separator,
2612             gchar       **str_array)
2613 {
2614   gchar *string;
2615   gchar *ptr;
2616 
2617   g_return_val_if_fail (str_array != NULL, NULL);
2618 
2619   if (separator == NULL)
2620     separator = "";
2621 
2622   if (*str_array)
2623     {
2624       gint i;
2625       gsize len;
2626       gsize separator_len;
2627 
2628       separator_len = strlen (separator);
2629       /* First part, getting length */
2630       len = 1 + strlen (str_array[0]);
2631       for (i = 1; str_array[i] != NULL; i++)
2632         len += strlen (str_array[i]);
2633       len += separator_len * (i - 1);
2634 
2635       /* Second part, building string */
2636       string = g_new (gchar, len);
2637       ptr = g_stpcpy (string, *str_array);
2638       for (i = 1; str_array[i] != NULL; i++)
2639         {
2640           ptr = g_stpcpy (ptr, separator);
2641           ptr = g_stpcpy (ptr, str_array[i]);
2642         }
2643       }
2644   else
2645     string = g_strdup ("");
2646 
2647   return string;
2648 }
2649 
2650 /**
2651  * g_strjoin:
2652  * @separator: (nullable): a string to insert between each of the
2653  *     strings, or %NULL
2654  * @...: a %NULL-terminated list of strings to join
2655  *
2656  * Joins a number of strings together to form one long string, with the
2657  * optional @separator inserted between each of them. The returned string
2658  * should be freed with g_free().
2659  *
2660  * Returns: a newly-allocated string containing all of the strings joined
2661  *     together, with @separator between them
2662  */
2663 gchar*
g_strjoin(const gchar * separator,...)2664 g_strjoin (const gchar *separator,
2665            ...)
2666 {
2667   gchar *string, *s;
2668   va_list args;
2669   gsize len;
2670   gsize separator_len;
2671   gchar *ptr;
2672 
2673   if (separator == NULL)
2674     separator = "";
2675 
2676   separator_len = strlen (separator);
2677 
2678   va_start (args, separator);
2679 
2680   s = va_arg (args, gchar*);
2681 
2682   if (s)
2683     {
2684       /* First part, getting length */
2685       len = 1 + strlen (s);
2686 
2687       s = va_arg (args, gchar*);
2688       while (s)
2689         {
2690           len += separator_len + strlen (s);
2691           s = va_arg (args, gchar*);
2692         }
2693       va_end (args);
2694 
2695       /* Second part, building string */
2696       string = g_new (gchar, len);
2697 
2698       va_start (args, separator);
2699 
2700       s = va_arg (args, gchar*);
2701       ptr = g_stpcpy (string, s);
2702 
2703       s = va_arg (args, gchar*);
2704       while (s)
2705         {
2706           ptr = g_stpcpy (ptr, separator);
2707           ptr = g_stpcpy (ptr, s);
2708           s = va_arg (args, gchar*);
2709         }
2710     }
2711   else
2712     string = g_strdup ("");
2713 
2714   va_end (args);
2715 
2716   return string;
2717 }
2718 
2719 
2720 /**
2721  * g_strstr_len:
2722  * @haystack: a string
2723  * @haystack_len: the maximum length of @haystack. Note that -1 is
2724  *     a valid length, if @haystack is nul-terminated, meaning it will
2725  *     search through the whole string.
2726  * @needle: the string to search for
2727  *
2728  * Searches the string @haystack for the first occurrence
2729  * of the string @needle, limiting the length of the search
2730  * to @haystack_len.
2731  *
2732  * Returns: a pointer to the found occurrence, or
2733  *    %NULL if not found.
2734  */
2735 gchar *
g_strstr_len(const gchar * haystack,gssize haystack_len,const gchar * needle)2736 g_strstr_len (const gchar *haystack,
2737               gssize       haystack_len,
2738               const gchar *needle)
2739 {
2740   g_return_val_if_fail (haystack != NULL, NULL);
2741   g_return_val_if_fail (needle != NULL, NULL);
2742 
2743   if (haystack_len < 0)
2744     return strstr (haystack, needle);
2745   else
2746     {
2747       const gchar *p = haystack;
2748       gsize needle_len = strlen (needle);
2749       gsize haystack_len_unsigned = haystack_len;
2750       const gchar *end;
2751       gsize i;
2752 
2753       if (needle_len == 0)
2754         return (gchar *)haystack;
2755 
2756       if (haystack_len_unsigned < needle_len)
2757         return NULL;
2758 
2759       end = haystack + haystack_len - needle_len;
2760 
2761       while (p <= end && *p)
2762         {
2763           for (i = 0; i < needle_len; i++)
2764             if (p[i] != needle[i])
2765               goto next;
2766 
2767           return (gchar *)p;
2768 
2769         next:
2770           p++;
2771         }
2772 
2773       return NULL;
2774     }
2775 }
2776 
2777 /**
2778  * g_strrstr:
2779  * @haystack: a nul-terminated string
2780  * @needle: the nul-terminated string to search for
2781  *
2782  * Searches the string @haystack for the last occurrence
2783  * of the string @needle.
2784  *
2785  * Returns: a pointer to the found occurrence, or
2786  *    %NULL if not found.
2787  */
2788 gchar *
g_strrstr(const gchar * haystack,const gchar * needle)2789 g_strrstr (const gchar *haystack,
2790            const gchar *needle)
2791 {
2792   gsize i;
2793   gsize needle_len;
2794   gsize haystack_len;
2795   const gchar *p;
2796 
2797   g_return_val_if_fail (haystack != NULL, NULL);
2798   g_return_val_if_fail (needle != NULL, NULL);
2799 
2800   needle_len = strlen (needle);
2801   haystack_len = strlen (haystack);
2802 
2803   if (needle_len == 0)
2804     return (gchar *)haystack;
2805 
2806   if (haystack_len < needle_len)
2807     return NULL;
2808 
2809   p = haystack + haystack_len - needle_len;
2810 
2811   while (p >= haystack)
2812     {
2813       for (i = 0; i < needle_len; i++)
2814         if (p[i] != needle[i])
2815           goto next;
2816 
2817       return (gchar *)p;
2818 
2819     next:
2820       p--;
2821     }
2822 
2823   return NULL;
2824 }
2825 
2826 /**
2827  * g_strrstr_len:
2828  * @haystack: a nul-terminated string
2829  * @haystack_len: the maximum length of @haystack
2830  * @needle: the nul-terminated string to search for
2831  *
2832  * Searches the string @haystack for the last occurrence
2833  * of the string @needle, limiting the length of the search
2834  * to @haystack_len.
2835  *
2836  * Returns: a pointer to the found occurrence, or
2837  *    %NULL if not found.
2838  */
2839 gchar *
g_strrstr_len(const gchar * haystack,gssize haystack_len,const gchar * needle)2840 g_strrstr_len (const gchar *haystack,
2841                gssize        haystack_len,
2842                const gchar *needle)
2843 {
2844   g_return_val_if_fail (haystack != NULL, NULL);
2845   g_return_val_if_fail (needle != NULL, NULL);
2846 
2847   if (haystack_len < 0)
2848     return g_strrstr (haystack, needle);
2849   else
2850     {
2851       gsize needle_len = strlen (needle);
2852       const gchar *haystack_max = haystack + haystack_len;
2853       const gchar *p = haystack;
2854       gsize i;
2855 
2856       while (p < haystack_max && *p)
2857         p++;
2858 
2859       if (p < haystack + needle_len)
2860         return NULL;
2861 
2862       p -= needle_len;
2863 
2864       while (p >= haystack)
2865         {
2866           for (i = 0; i < needle_len; i++)
2867             if (p[i] != needle[i])
2868               goto next;
2869 
2870           return (gchar *)p;
2871 
2872         next:
2873           p--;
2874         }
2875 
2876       return NULL;
2877     }
2878 }
2879 
2880 
2881 /**
2882  * g_str_has_suffix:
2883  * @str: a nul-terminated string
2884  * @suffix: the nul-terminated suffix to look for
2885  *
2886  * Looks whether the string @str ends with @suffix.
2887  *
2888  * Returns: %TRUE if @str end with @suffix, %FALSE otherwise.
2889  *
2890  * Since: 2.2
2891  */
2892 gboolean
g_str_has_suffix(const gchar * str,const gchar * suffix)2893 g_str_has_suffix (const gchar *str,
2894                   const gchar *suffix)
2895 {
2896   gsize str_len;
2897   gsize suffix_len;
2898 
2899   g_return_val_if_fail (str != NULL, FALSE);
2900   g_return_val_if_fail (suffix != NULL, FALSE);
2901 
2902   str_len = strlen (str);
2903   suffix_len = strlen (suffix);
2904 
2905   if (str_len < suffix_len)
2906     return FALSE;
2907 
2908   return strcmp (str + str_len - suffix_len, suffix) == 0;
2909 }
2910 
2911 /**
2912  * g_str_has_prefix:
2913  * @str: a nul-terminated string
2914  * @prefix: the nul-terminated prefix to look for
2915  *
2916  * Looks whether the string @str begins with @prefix.
2917  *
2918  * Returns: %TRUE if @str begins with @prefix, %FALSE otherwise.
2919  *
2920  * Since: 2.2
2921  */
2922 gboolean
g_str_has_prefix(const gchar * str,const gchar * prefix)2923 g_str_has_prefix (const gchar *str,
2924                   const gchar *prefix)
2925 {
2926   g_return_val_if_fail (str != NULL, FALSE);
2927   g_return_val_if_fail (prefix != NULL, FALSE);
2928 
2929   return strncmp (str, prefix, strlen (prefix)) == 0;
2930 }
2931 
2932 /**
2933  * g_strv_length:
2934  * @str_array: a %NULL-terminated array of strings
2935  *
2936  * Returns the length of the given %NULL-terminated
2937  * string array @str_array. @str_array must not be %NULL.
2938  *
2939  * Returns: length of @str_array.
2940  *
2941  * Since: 2.6
2942  */
2943 guint
g_strv_length(gchar ** str_array)2944 g_strv_length (gchar **str_array)
2945 {
2946   guint i = 0;
2947 
2948   g_return_val_if_fail (str_array != NULL, 0);
2949 
2950   while (str_array[i])
2951     ++i;
2952 
2953   return i;
2954 }
2955 
2956 static void
index_add_folded(GPtrArray * array,const gchar * start,const gchar * end)2957 index_add_folded (GPtrArray   *array,
2958                   const gchar *start,
2959                   const gchar *end)
2960 {
2961   gchar *normal;
2962 
2963   normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE);
2964 
2965   /* TODO: Invent time machine.  Converse with Mustafa Ataturk... */
2966   if (strstr (normal, "ı") || strstr (normal, "İ"))
2967     {
2968       gchar *s = normal;
2969       GString *tmp;
2970 
2971       tmp = g_string_new (NULL);
2972 
2973       while (*s)
2974         {
2975           gchar *i, *I, *e;
2976 
2977           i = strstr (s, "ı");
2978           I = strstr (s, "İ");
2979 
2980           if (!i && !I)
2981             break;
2982           else if (i && !I)
2983             e = i;
2984           else if (I && !i)
2985             e = I;
2986           else if (i < I)
2987             e = i;
2988           else
2989             e = I;
2990 
2991           g_string_append_len (tmp, s, e - s);
2992           g_string_append_c (tmp, 'i');
2993           s = g_utf8_next_char (e);
2994         }
2995 
2996       g_string_append (tmp, s);
2997       g_free (normal);
2998       normal = g_string_free (tmp, FALSE);
2999     }
3000 
3001   g_ptr_array_add (array, g_utf8_casefold (normal, -1));
3002   g_free (normal);
3003 }
3004 
3005 static gchar **
split_words(const gchar * value)3006 split_words (const gchar *value)
3007 {
3008   const gchar *start = NULL;
3009   GPtrArray *result;
3010   const gchar *s;
3011 
3012   result = g_ptr_array_new ();
3013 
3014   for (s = value; *s; s = g_utf8_next_char (s))
3015     {
3016       gunichar c = g_utf8_get_char (s);
3017 
3018       if (start == NULL)
3019         {
3020           if (g_unichar_isalnum (c) || g_unichar_ismark (c))
3021             start = s;
3022         }
3023       else
3024         {
3025           if (!g_unichar_isalnum (c) && !g_unichar_ismark (c))
3026             {
3027               index_add_folded (result, start, s);
3028               start = NULL;
3029             }
3030         }
3031     }
3032 
3033   if (start)
3034     index_add_folded (result, start, s);
3035 
3036   g_ptr_array_add (result, NULL);
3037 
3038   return (gchar **) g_ptr_array_free (result, FALSE);
3039 }
3040 
3041 /**
3042  * g_str_tokenize_and_fold:
3043  * @string: a string
3044  * @translit_locale: (nullable): the language code (like 'de' or
3045  *   'en_GB') from which @string originates
3046  * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a
3047  *   return location for ASCII alternates
3048  *
3049  * Tokenises @string and performs folding on each token.
3050  *
3051  * A token is a non-empty sequence of alphanumeric characters in the
3052  * source string, separated by non-alphanumeric characters.  An
3053  * "alphanumeric" character for this purpose is one that matches
3054  * g_unichar_isalnum() or g_unichar_ismark().
3055  *
3056  * Each token is then (Unicode) normalised and case-folded.  If
3057  * @ascii_alternates is non-%NULL and some of the returned tokens
3058  * contain non-ASCII characters, ASCII alternatives will be generated.
3059  *
3060  * The number of ASCII alternatives that are generated and the method
3061  * for doing so is unspecified, but @translit_locale (if specified) may
3062  * improve the transliteration if the language of the source string is
3063  * known.
3064  *
3065  * Returns: (transfer full) (array zero-terminated=1): the folded tokens
3066  *
3067  * Since: 2.40
3068  **/
3069 gchar **
g_str_tokenize_and_fold(const gchar * string,const gchar * translit_locale,gchar *** ascii_alternates)3070 g_str_tokenize_and_fold (const gchar   *string,
3071                          const gchar   *translit_locale,
3072                          gchar       ***ascii_alternates)
3073 {
3074   gchar **result;
3075 
3076   g_return_val_if_fail (string != NULL, NULL);
3077 
3078   if (ascii_alternates && g_str_is_ascii (string))
3079     {
3080       *ascii_alternates = g_new0 (gchar *, 0 + 1);
3081       ascii_alternates = NULL;
3082     }
3083 
3084   result = split_words (string);
3085 
3086   if (ascii_alternates)
3087     {
3088       gint i, j, n;
3089 
3090       n = g_strv_length (result);
3091       *ascii_alternates = g_new (gchar *, n + 1);
3092       j = 0;
3093 
3094       for (i = 0; i < n; i++)
3095         {
3096           if (!g_str_is_ascii (result[i]))
3097             {
3098               gchar *composed;
3099               gchar *ascii;
3100               gint k;
3101 
3102               composed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL_COMPOSE);
3103 
3104               ascii = g_str_to_ascii (composed, translit_locale);
3105 
3106               /* Only accept strings that are now entirely alnums */
3107               for (k = 0; ascii[k]; k++)
3108                 if (!g_ascii_isalnum (ascii[k]))
3109                   break;
3110 
3111               if (ascii[k] == '\0')
3112                 /* Made it to the end... */
3113                 (*ascii_alternates)[j++] = ascii;
3114               else
3115                 g_free (ascii);
3116 
3117               g_free (composed);
3118             }
3119         }
3120 
3121       (*ascii_alternates)[j] = NULL;
3122     }
3123 
3124   return result;
3125 }
3126 
3127 /**
3128  * g_str_match_string:
3129  * @search_term: the search term from the user
3130  * @potential_hit: the text that may be a hit
3131  * @accept_alternates: %TRUE to accept ASCII alternates
3132  *
3133  * Checks if a search conducted for @search_term should match
3134  * @potential_hit.
3135  *
3136  * This function calls g_str_tokenize_and_fold() on both
3137  * @search_term and @potential_hit.  ASCII alternates are never taken
3138  * for @search_term but will be taken for @potential_hit according to
3139  * the value of @accept_alternates.
3140  *
3141  * A hit occurs when each folded token in @search_term is a prefix of a
3142  * folded token from @potential_hit.
3143  *
3144  * Depending on how you're performing the search, it will typically be
3145  * faster to call g_str_tokenize_and_fold() on each string in
3146  * your corpus and build an index on the returned folded tokens, then
3147  * call g_str_tokenize_and_fold() on the search term and
3148  * perform lookups into that index.
3149  *
3150  * As some examples, searching for ‘fred’ would match the potential hit
3151  * ‘Smith, Fred’ and also ‘Frédéric’.  Searching for ‘Fréd’ would match
3152  * ‘Frédéric’ but not ‘Frederic’ (due to the one-directional nature of
3153  * accent matching).  Searching ‘fo’ would match ‘Foo’ and ‘Bar Foo
3154  * Baz’, but not ‘SFO’ (because no word has ‘fo’ as a prefix).
3155  *
3156  * Returns: %TRUE if @potential_hit is a hit
3157  *
3158  * Since: 2.40
3159  **/
3160 gboolean
g_str_match_string(const gchar * search_term,const gchar * potential_hit,gboolean accept_alternates)3161 g_str_match_string (const gchar *search_term,
3162                     const gchar *potential_hit,
3163                     gboolean     accept_alternates)
3164 {
3165   gchar **alternates = NULL;
3166   gchar **term_tokens;
3167   gchar **hit_tokens;
3168   gboolean matched;
3169   gint i, j;
3170 
3171   g_return_val_if_fail (search_term != NULL, FALSE);
3172   g_return_val_if_fail (potential_hit != NULL, FALSE);
3173 
3174   term_tokens = g_str_tokenize_and_fold (search_term, NULL, NULL);
3175   hit_tokens = g_str_tokenize_and_fold (potential_hit, NULL, accept_alternates ? &alternates : NULL);
3176 
3177   matched = TRUE;
3178 
3179   for (i = 0; term_tokens[i]; i++)
3180     {
3181       for (j = 0; hit_tokens[j]; j++)
3182         if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
3183           goto one_matched;
3184 
3185       if (accept_alternates)
3186         for (j = 0; alternates[j]; j++)
3187           if (g_str_has_prefix (alternates[j], term_tokens[i]))
3188             goto one_matched;
3189 
3190       matched = FALSE;
3191       break;
3192 
3193 one_matched:
3194       continue;
3195     }
3196 
3197   g_strfreev (term_tokens);
3198   g_strfreev (hit_tokens);
3199   g_strfreev (alternates);
3200 
3201   return matched;
3202 }
3203 
3204 /**
3205  * g_strv_contains:
3206  * @strv: a %NULL-terminated array of strings
3207  * @str: a string
3208  *
3209  * Checks if @strv contains @str. @strv must not be %NULL.
3210  *
3211  * Returns: %TRUE if @str is an element of @strv, according to g_str_equal().
3212  *
3213  * Since: 2.44
3214  */
3215 gboolean
g_strv_contains(const gchar * const * strv,const gchar * str)3216 g_strv_contains (const gchar * const *strv,
3217                  const gchar         *str)
3218 {
3219   g_return_val_if_fail (strv != NULL, FALSE);
3220   g_return_val_if_fail (str != NULL, FALSE);
3221 
3222   for (; *strv != NULL; strv++)
3223     {
3224       if (g_str_equal (str, *strv))
3225         return TRUE;
3226     }
3227 
3228   return FALSE;
3229 }
3230 
3231 /**
3232  * g_strv_equal:
3233  * @strv1: a %NULL-terminated array of strings
3234  * @strv2: another %NULL-terminated array of strings
3235  *
3236  * Checks if @strv1 and @strv2 contain exactly the same elements in exactly the
3237  * same order. Elements are compared using g_str_equal(). To match independently
3238  * of order, sort the arrays first (using g_qsort_with_data() or similar).
3239  *
3240  * Two empty arrays are considered equal. Neither @strv1 not @strv2 may be
3241  * %NULL.
3242  *
3243  * Returns: %TRUE if @strv1 and @strv2 are equal
3244  * Since: 2.60
3245  */
3246 gboolean
g_strv_equal(const gchar * const * strv1,const gchar * const * strv2)3247 g_strv_equal (const gchar * const *strv1,
3248               const gchar * const *strv2)
3249 {
3250   g_return_val_if_fail (strv1 != NULL, FALSE);
3251   g_return_val_if_fail (strv2 != NULL, FALSE);
3252 
3253   if (strv1 == strv2)
3254     return TRUE;
3255 
3256   for (; *strv1 != NULL && *strv2 != NULL; strv1++, strv2++)
3257     {
3258       if (!g_str_equal (*strv1, *strv2))
3259         return FALSE;
3260     }
3261 
3262   return (*strv1 == NULL && *strv2 == NULL);
3263 }
3264 
3265 static gboolean
str_has_sign(const gchar * str)3266 str_has_sign (const gchar *str)
3267 {
3268   return str[0] == '-' || str[0] == '+';
3269 }
3270 
3271 static gboolean
str_has_hex_prefix(const gchar * str)3272 str_has_hex_prefix (const gchar *str)
3273 {
3274   return str[0] == '0' && g_ascii_tolower (str[1]) == 'x';
3275 }
3276 
3277 /**
3278  * g_ascii_string_to_signed:
3279  * @str: a string
3280  * @base: base of a parsed number
3281  * @min: a lower bound (inclusive)
3282  * @max: an upper bound (inclusive)
3283  * @out_num: (out) (optional): a return location for a number
3284  * @error: a return location for #GError
3285  *
3286  * A convenience function for converting a string to a signed number.
3287  *
3288  * This function assumes that @str contains only a number of the given
3289  * @base that is within inclusive bounds limited by @min and @max. If
3290  * this is true, then the converted number is stored in @out_num. An
3291  * empty string is not a valid input. A string with leading or
3292  * trailing whitespace is also an invalid input.
3293  *
3294  * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3295  * not be prefixed with "0x" or "0X". Such a problem does not exist
3296  * for octal numbers, since they were usually prefixed with a zero
3297  * which does not change the value of the parsed number.
3298  *
3299  * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3300  * domain. If the input is invalid, the error code will be
3301  * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3302  * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3303  *
3304  * See g_ascii_strtoll() if you have more complex needs such as
3305  * parsing a string which starts with a number, but then has other
3306  * characters.
3307  *
3308  * Returns: %TRUE if @str was a number, otherwise %FALSE.
3309  *
3310  * Since: 2.54
3311  */
3312 gboolean
g_ascii_string_to_signed(const gchar * str,guint base,gint64 min,gint64 max,gint64 * out_num,GError ** error)3313 g_ascii_string_to_signed (const gchar  *str,
3314                           guint         base,
3315                           gint64        min,
3316                           gint64        max,
3317                           gint64       *out_num,
3318                           GError      **error)
3319 {
3320   gint64 number;
3321   const gchar *end_ptr = NULL;
3322   gint saved_errno = 0;
3323 
3324   g_return_val_if_fail (str != NULL, FALSE);
3325   g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3326   g_return_val_if_fail (min <= max, FALSE);
3327   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3328 
3329   if (str[0] == '\0')
3330     {
3331       g_set_error_literal (error,
3332                            G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3333                            _("Empty string is not a number"));
3334       return FALSE;
3335     }
3336 
3337   errno = 0;
3338   number = g_ascii_strtoll (str, (gchar **)&end_ptr, base);
3339   saved_errno = errno;
3340 
3341   if (/* We do not allow leading whitespace, but g_ascii_strtoll
3342        * accepts it and just skips it, so we need to check for it
3343        * ourselves.
3344        */
3345       g_ascii_isspace (str[0]) ||
3346       /* We don't support hexadecimal numbers prefixed with 0x or
3347        * 0X.
3348        */
3349       (base == 16 &&
3350        (str_has_sign (str) ? str_has_hex_prefix (str + 1) : str_has_hex_prefix (str))) ||
3351       (saved_errno != 0 && saved_errno != ERANGE) ||
3352       end_ptr == NULL ||
3353       *end_ptr != '\0')
3354     {
3355       g_set_error (error,
3356                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3357                    _("“%s” is not a signed number"), str);
3358       return FALSE;
3359     }
3360   if (saved_errno == ERANGE || number < min || number > max)
3361     {
3362       gchar *min_str = g_strdup_printf ("%" G_GINT64_FORMAT, min);
3363       gchar *max_str = g_strdup_printf ("%" G_GINT64_FORMAT, max);
3364 
3365       g_set_error (error,
3366                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3367                    _("Number “%s” is out of bounds [%s, %s]"),
3368                    str, min_str, max_str);
3369       g_free (min_str);
3370       g_free (max_str);
3371       return FALSE;
3372     }
3373   if (out_num != NULL)
3374     *out_num = number;
3375   return TRUE;
3376 }
3377 
3378 /**
3379  * g_ascii_string_to_unsigned:
3380  * @str: a string
3381  * @base: base of a parsed number
3382  * @min: a lower bound (inclusive)
3383  * @max: an upper bound (inclusive)
3384  * @out_num: (out) (optional): a return location for a number
3385  * @error: a return location for #GError
3386  *
3387  * A convenience function for converting a string to an unsigned number.
3388  *
3389  * This function assumes that @str contains only a number of the given
3390  * @base that is within inclusive bounds limited by @min and @max. If
3391  * this is true, then the converted number is stored in @out_num. An
3392  * empty string is not a valid input. A string with leading or
3393  * trailing whitespace is also an invalid input. A string with a leading sign
3394  * (`-` or `+`) is not a valid input for the unsigned parser.
3395  *
3396  * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3397  * not be prefixed with "0x" or "0X". Such a problem does not exist
3398  * for octal numbers, since they were usually prefixed with a zero
3399  * which does not change the value of the parsed number.
3400  *
3401  * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3402  * domain. If the input is invalid, the error code will be
3403  * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3404  * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3405  *
3406  * See g_ascii_strtoull() if you have more complex needs such as
3407  * parsing a string which starts with a number, but then has other
3408  * characters.
3409  *
3410  * Returns: %TRUE if @str was a number, otherwise %FALSE.
3411  *
3412  * Since: 2.54
3413  */
3414 gboolean
g_ascii_string_to_unsigned(const gchar * str,guint base,guint64 min,guint64 max,guint64 * out_num,GError ** error)3415 g_ascii_string_to_unsigned (const gchar  *str,
3416                             guint         base,
3417                             guint64       min,
3418                             guint64       max,
3419                             guint64      *out_num,
3420                             GError      **error)
3421 {
3422   guint64 number;
3423   const gchar *end_ptr = NULL;
3424   gint saved_errno = 0;
3425 
3426   g_return_val_if_fail (str != NULL, FALSE);
3427   g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3428   g_return_val_if_fail (min <= max, FALSE);
3429   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3430 
3431   if (str[0] == '\0')
3432     {
3433       g_set_error_literal (error,
3434                            G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3435                            _("Empty string is not a number"));
3436       return FALSE;
3437     }
3438 
3439   errno = 0;
3440   number = g_ascii_strtoull (str, (gchar **)&end_ptr, base);
3441   saved_errno = errno;
3442 
3443   if (/* We do not allow leading whitespace, but g_ascii_strtoull
3444        * accepts it and just skips it, so we need to check for it
3445        * ourselves.
3446        */
3447       g_ascii_isspace (str[0]) ||
3448       /* Unsigned number should have no sign.
3449        */
3450       str_has_sign (str) ||
3451       /* We don't support hexadecimal numbers prefixed with 0x or
3452        * 0X.
3453        */
3454       (base == 16 && str_has_hex_prefix (str)) ||
3455       (saved_errno != 0 && saved_errno != ERANGE) ||
3456       end_ptr == NULL ||
3457       *end_ptr != '\0')
3458     {
3459       g_set_error (error,
3460                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3461                    _("“%s” is not an unsigned number"), str);
3462       return FALSE;
3463     }
3464   if (saved_errno == ERANGE || number < min || number > max)
3465     {
3466       gchar *min_str = g_strdup_printf ("%" G_GUINT64_FORMAT, min);
3467       gchar *max_str = g_strdup_printf ("%" G_GUINT64_FORMAT, max);
3468 
3469       g_set_error (error,
3470                    G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3471                    _("Number “%s” is out of bounds [%s, %s]"),
3472                    str, min_str, max_str);
3473       g_free (min_str);
3474       g_free (max_str);
3475       return FALSE;
3476     }
3477   if (out_num != NULL)
3478     *out_num = number;
3479   return TRUE;
3480 }
3481 
3482 G_DEFINE_QUARK (g-number-parser-error-quark, g_number_parser_error)
3483