1 /* GLIB - Library of useful routines for C programming
2 * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18 /*
19 * Modified by the GLib Team and others 1997-2000. See the AUTHORS
20 * file for a list of people on the GLib Team. See the ChangeLog
21 * files for a list of changes. These files are distributed with
22 * GLib at ftp://ftp.gtk.org/pub/gtk/.
23 */
24
25 /*
26 * MT safe
27 */
28
29 #include "config.h"
30
31 #include <stdarg.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <locale.h>
35 #include <string.h>
36 #include <locale.h>
37 #include <errno.h>
38 #include <garray.h>
39 #include <ctype.h> /* For tolower() */
40
41 #ifdef HAVE_XLOCALE_H
42 /* Needed on BSD/OS X for e.g. strtod_l */
43 #include <xlocale.h>
44 #endif
45
46 #ifdef G_OS_WIN32
47 #include <windows.h>
48 #endif
49
50 /* do not include <unistd.h> here, it may interfere with g_strsignal() */
51
52 #include "gstrfuncs.h"
53
54 #include "gprintf.h"
55 #include "gprintfint.h"
56 #include "glibintl.h"
57
58
59 /**
60 * SECTION:string_utils
61 * @title: String Utility Functions
62 * @short_description: various string-related functions
63 *
64 * This section describes a number of utility functions for creating,
65 * duplicating, and manipulating strings.
66 *
67 * Note that the functions g_printf(), g_fprintf(), g_sprintf(),
68 * g_vprintf(), g_vfprintf(), g_vsprintf() and g_vasprintf()
69 * are declared in the header `gprintf.h` which is not included in `glib.h`
70 * (otherwise using `glib.h` would drag in `stdio.h`), so you'll have to
71 * explicitly include `<glib/gprintf.h>` in order to use the GLib
72 * printf() functions.
73 *
74 * ## String precision pitfalls # {#string-precision}
75 *
76 * While you may use the printf() functions to format UTF-8 strings,
77 * notice that the precision of a \%Ns parameter is interpreted
78 * as the number of bytes, not characters to print. On top of that,
79 * the GNU libc implementation of the printf() functions has the
80 * "feature" that it checks that the string given for the \%Ns
81 * parameter consists of a whole number of characters in the current
82 * encoding. So, unless you are sure you are always going to be in an
83 * UTF-8 locale or your know your text is restricted to ASCII, avoid
84 * using \%Ns. If your intention is to format strings for a
85 * certain number of columns, then \%Ns is not a correct solution
86 * anyway, since it fails to take wide characters (see g_unichar_iswide())
87 * into account.
88 *
89 * Note also that there are various printf() parameters which are platform
90 * dependent. GLib provides platform independent macros for these parameters
91 * which should be used instead. A common example is %G_GUINT64_FORMAT, which
92 * should be used instead of `%llu` or similar parameters for formatting
93 * 64-bit integers. These macros are all named `G_*_FORMAT`; see
94 * [Basic Types][glib-Basic-Types].
95 */
96
97 /**
98 * g_ascii_isalnum:
99 * @c: any character
100 *
101 * Determines whether a character is alphanumeric.
102 *
103 * Unlike the standard C library isalnum() function, this only
104 * recognizes standard ASCII letters and ignores the locale,
105 * returning %FALSE for all non-ASCII characters. Also, unlike
106 * the standard library function, this takes a char, not an int,
107 * so don't call it on %EOF, but no need to cast to #guchar before
108 * passing a possibly non-ASCII character in.
109 *
110 * Returns: %TRUE if @c is an ASCII alphanumeric character
111 */
112
113 /**
114 * g_ascii_isalpha:
115 * @c: any character
116 *
117 * Determines whether a character is alphabetic (i.e. a letter).
118 *
119 * Unlike the standard C library isalpha() function, this only
120 * recognizes standard ASCII letters and ignores the locale,
121 * returning %FALSE for all non-ASCII characters. Also, unlike
122 * the standard library function, this takes a char, not an int,
123 * so don't call it on %EOF, but no need to cast to #guchar before
124 * passing a possibly non-ASCII character in.
125 *
126 * Returns: %TRUE if @c is an ASCII alphabetic character
127 */
128
129 /**
130 * g_ascii_iscntrl:
131 * @c: any character
132 *
133 * Determines whether a character is a control character.
134 *
135 * Unlike the standard C library iscntrl() function, this only
136 * recognizes standard ASCII control characters and ignores the
137 * locale, returning %FALSE for all non-ASCII characters. Also,
138 * unlike the standard library function, this takes a char, not
139 * an int, so don't call it on %EOF, but no need to cast to #guchar
140 * before passing a possibly non-ASCII character in.
141 *
142 * Returns: %TRUE if @c is an ASCII control character.
143 */
144
145 /**
146 * g_ascii_isdigit:
147 * @c: any character
148 *
149 * Determines whether a character is digit (0-9).
150 *
151 * Unlike the standard C library isdigit() function, this takes
152 * a char, not an int, so don't call it on %EOF, but no need to
153 * cast to #guchar before passing a possibly non-ASCII character in.
154 *
155 * Returns: %TRUE if @c is an ASCII digit.
156 */
157
158 /**
159 * g_ascii_isgraph:
160 * @c: any character
161 *
162 * Determines whether a character is a printing character and not a space.
163 *
164 * Unlike the standard C library isgraph() function, this only
165 * recognizes standard ASCII characters and ignores the locale,
166 * returning %FALSE for all non-ASCII characters. Also, unlike
167 * the standard library function, this takes a char, not an int,
168 * so don't call it on %EOF, but no need to cast to #guchar before
169 * passing a possibly non-ASCII character in.
170 *
171 * Returns: %TRUE if @c is an ASCII printing character other than space.
172 */
173
174 /**
175 * g_ascii_islower:
176 * @c: any character
177 *
178 * Determines whether a character is an ASCII lower case letter.
179 *
180 * Unlike the standard C library islower() function, this only
181 * recognizes standard ASCII letters and ignores the locale,
182 * returning %FALSE for all non-ASCII characters. Also, unlike
183 * the standard library function, this takes a char, not an int,
184 * so don't call it on %EOF, but no need to worry about casting
185 * to #guchar before passing a possibly non-ASCII character in.
186 *
187 * Returns: %TRUE if @c is an ASCII lower case letter
188 */
189
190 /**
191 * g_ascii_isprint:
192 * @c: any character
193 *
194 * Determines whether a character is a printing character.
195 *
196 * Unlike the standard C library isprint() function, this only
197 * recognizes standard ASCII characters and ignores the locale,
198 * returning %FALSE for all non-ASCII characters. Also, unlike
199 * the standard library function, this takes a char, not an int,
200 * so don't call it on %EOF, but no need to cast to #guchar before
201 * passing a possibly non-ASCII character in.
202 *
203 * Returns: %TRUE if @c is an ASCII printing character.
204 */
205
206 /**
207 * g_ascii_ispunct:
208 * @c: any character
209 *
210 * Determines whether a character is a punctuation character.
211 *
212 * Unlike the standard C library ispunct() function, this only
213 * recognizes standard ASCII letters and ignores the locale,
214 * returning %FALSE for all non-ASCII characters. Also, unlike
215 * the standard library function, this takes a char, not an int,
216 * so don't call it on %EOF, but no need to cast to #guchar before
217 * passing a possibly non-ASCII character in.
218 *
219 * Returns: %TRUE if @c is an ASCII punctuation character.
220 */
221
222 /**
223 * g_ascii_isspace:
224 * @c: any character
225 *
226 * Determines whether a character is a white-space character.
227 *
228 * Unlike the standard C library isspace() function, this only
229 * recognizes standard ASCII white-space and ignores the locale,
230 * returning %FALSE for all non-ASCII characters. Also, unlike
231 * the standard library function, this takes a char, not an int,
232 * so don't call it on %EOF, but no need to cast to #guchar before
233 * passing a possibly non-ASCII character in.
234 *
235 * Returns: %TRUE if @c is an ASCII white-space character
236 */
237
238 /**
239 * g_ascii_isupper:
240 * @c: any character
241 *
242 * Determines whether a character is an ASCII upper case letter.
243 *
244 * Unlike the standard C library isupper() function, this only
245 * recognizes standard ASCII letters and ignores the locale,
246 * returning %FALSE for all non-ASCII characters. Also, unlike
247 * the standard library function, this takes a char, not an int,
248 * so don't call it on %EOF, but no need to worry about casting
249 * to #guchar before passing a possibly non-ASCII character in.
250 *
251 * Returns: %TRUE if @c is an ASCII upper case letter
252 */
253
254 /**
255 * g_ascii_isxdigit:
256 * @c: any character
257 *
258 * Determines whether a character is a hexadecimal-digit character.
259 *
260 * Unlike the standard C library isxdigit() function, this takes
261 * a char, not an int, so don't call it on %EOF, but no need to
262 * cast to #guchar before passing a possibly non-ASCII character in.
263 *
264 * Returns: %TRUE if @c is an ASCII hexadecimal-digit character.
265 */
266
267 /**
268 * G_ASCII_DTOSTR_BUF_SIZE:
269 *
270 * A good size for a buffer to be passed into g_ascii_dtostr().
271 * It is guaranteed to be enough for all output of that function
272 * on systems with 64bit IEEE-compatible doubles.
273 *
274 * The typical usage would be something like:
275 * |[<!-- language="C" -->
276 * char buf[G_ASCII_DTOSTR_BUF_SIZE];
277 *
278 * fprintf (out, "value=%s\n", g_ascii_dtostr (buf, sizeof (buf), value));
279 * ]|
280 */
281
282 /**
283 * g_strstrip:
284 * @string: a string to remove the leading and trailing whitespace from
285 *
286 * Removes leading and trailing whitespace from a string.
287 * See g_strchomp() and g_strchug().
288 *
289 * Returns: @string
290 */
291
292 /**
293 * G_STR_DELIMITERS:
294 *
295 * The standard delimiters, used in g_strdelimit().
296 */
297
298 static const guint16 ascii_table_data[256] = {
299 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
300 0x004, 0x104, 0x104, 0x004, 0x104, 0x104, 0x004, 0x004,
301 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
302 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004, 0x004,
303 0x140, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
304 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
305 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459, 0x459,
306 0x459, 0x459, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
307 0x0d0, 0x653, 0x653, 0x653, 0x653, 0x653, 0x653, 0x253,
308 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
309 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253, 0x253,
310 0x253, 0x253, 0x253, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x0d0,
311 0x0d0, 0x473, 0x473, 0x473, 0x473, 0x473, 0x473, 0x073,
312 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
313 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073, 0x073,
314 0x073, 0x073, 0x073, 0x0d0, 0x0d0, 0x0d0, 0x0d0, 0x004
315 /* the upper 128 are all zeroes */
316 };
317
318 const guint16 * const g_ascii_table = ascii_table_data;
319
320 #if defined (HAVE_NEWLOCALE) && \
321 defined (HAVE_USELOCALE) && \
322 defined (HAVE_STRTOD_L) && \
323 defined (HAVE_STRTOULL_L) && \
324 defined (HAVE_STRTOLL_L)
325 #define USE_XLOCALE 1
326 #endif
327
328 #ifdef USE_XLOCALE
329 static locale_t
get_C_locale(void)330 get_C_locale (void)
331 {
332 static gsize initialized = FALSE;
333 static locale_t C_locale = NULL;
334
335 if (g_once_init_enter (&initialized))
336 {
337 C_locale = newlocale (LC_ALL_MASK, "C", NULL);
338 g_once_init_leave (&initialized, TRUE);
339 }
340
341 return C_locale;
342 }
343 #endif
344
345 /**
346 * g_strdup:
347 * @str: (nullable): the string to duplicate
348 *
349 * Duplicates a string. If @str is %NULL it returns %NULL.
350 * The returned string should be freed with g_free()
351 * when no longer needed.
352 *
353 * Returns: a newly-allocated copy of @str
354 */
355 gchar*
g_strdup(const gchar * str)356 g_strdup (const gchar *str)
357 {
358 gchar *new_str;
359 gsize length;
360
361 if (str)
362 {
363 length = strlen (str) + 1;
364 new_str = g_new (char, length);
365 memcpy (new_str, str, length);
366 }
367 else
368 new_str = NULL;
369
370 return new_str;
371 }
372
373 /**
374 * g_memdup:
375 * @mem: the memory to copy.
376 * @byte_size: the number of bytes to copy.
377 *
378 * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
379 * from @mem. If @mem is %NULL it returns %NULL.
380 *
381 * Returns: a pointer to the newly-allocated copy of the memory, or %NULL if @mem
382 * is %NULL.
383 * Deprecated: 2.68: Use g_memdup2() instead, as it accepts a #gsize argument
384 * for @byte_size, avoiding the possibility of overflow in a #gsize → #guint
385 * conversion
386 */
387 gpointer
g_memdup(gconstpointer mem,guint byte_size)388 g_memdup (gconstpointer mem,
389 guint byte_size)
390 {
391 gpointer new_mem;
392
393 if (mem && byte_size != 0)
394 {
395 new_mem = g_malloc (byte_size);
396 memcpy (new_mem, mem, byte_size);
397 }
398 else
399 new_mem = NULL;
400
401 return new_mem;
402 }
403
404 /**
405 * g_memdup2:
406 * @mem: (nullable): the memory to copy.
407 * @byte_size: the number of bytes to copy.
408 *
409 * Allocates @byte_size bytes of memory, and copies @byte_size bytes into it
410 * from @mem. If @mem is %NULL it returns %NULL.
411 *
412 * This replaces g_memdup(), which was prone to integer overflows when
413 * converting the argument from a #gsize to a #guint.
414 *
415 * Returns: (nullable): a pointer to the newly-allocated copy of the memory,
416 * or %NULL if @mem is %NULL.
417 * Since: 2.68
418 */
419 gpointer
g_memdup2(gconstpointer mem,gsize byte_size)420 g_memdup2 (gconstpointer mem,
421 gsize byte_size)
422 {
423 gpointer new_mem;
424
425 if (mem && byte_size != 0)
426 {
427 new_mem = g_malloc (byte_size);
428 memcpy (new_mem, mem, byte_size);
429 }
430 else
431 new_mem = NULL;
432
433 return new_mem;
434 }
435
436 /**
437 * g_strndup:
438 * @str: the string to duplicate
439 * @n: the maximum number of bytes to copy from @str
440 *
441 * Duplicates the first @n bytes of a string, returning a newly-allocated
442 * buffer @n + 1 bytes long which will always be nul-terminated. If @str
443 * is less than @n bytes long the buffer is padded with nuls. If @str is
444 * %NULL it returns %NULL. The returned value should be freed when no longer
445 * needed.
446 *
447 * To copy a number of characters from a UTF-8 encoded string,
448 * use g_utf8_strncpy() instead.
449 *
450 * Returns: a newly-allocated buffer containing the first @n bytes
451 * of @str, nul-terminated
452 */
453 gchar*
g_strndup(const gchar * str,gsize n)454 g_strndup (const gchar *str,
455 gsize n)
456 {
457 gchar *new_str;
458
459 if (str)
460 {
461 new_str = g_new (gchar, n + 1);
462 strncpy (new_str, str, n);
463 new_str[n] = '\0';
464 }
465 else
466 new_str = NULL;
467
468 return new_str;
469 }
470
471 /**
472 * g_strnfill:
473 * @length: the length of the new string
474 * @fill_char: the byte to fill the string with
475 *
476 * Creates a new string @length bytes long filled with @fill_char.
477 * The returned string should be freed when no longer needed.
478 *
479 * Returns: a newly-allocated string filled the @fill_char
480 */
481 gchar*
g_strnfill(gsize length,gchar fill_char)482 g_strnfill (gsize length,
483 gchar fill_char)
484 {
485 gchar *str;
486
487 str = g_new (gchar, length + 1);
488 memset (str, (guchar)fill_char, length);
489 str[length] = '\0';
490
491 return str;
492 }
493
494 /**
495 * g_stpcpy:
496 * @dest: destination buffer.
497 * @src: source string.
498 *
499 * Copies a nul-terminated string into the dest buffer, include the
500 * trailing nul, and return a pointer to the trailing nul byte.
501 * This is useful for concatenating multiple strings together
502 * without having to repeatedly scan for the end.
503 *
504 * Returns: a pointer to trailing nul byte.
505 **/
506 gchar *
g_stpcpy(gchar * dest,const gchar * src)507 g_stpcpy (gchar *dest,
508 const gchar *src)
509 {
510 #ifdef HAVE_STPCPY
511 g_return_val_if_fail (dest != NULL, NULL);
512 g_return_val_if_fail (src != NULL, NULL);
513 return stpcpy (dest, src);
514 #else
515 gchar *d = dest;
516 const gchar *s = src;
517
518 g_return_val_if_fail (dest != NULL, NULL);
519 g_return_val_if_fail (src != NULL, NULL);
520 do
521 *d++ = *s;
522 while (*s++ != '\0');
523
524 return d - 1;
525 #endif
526 }
527
528 /**
529 * g_strdup_vprintf:
530 * @format: (not nullable): a standard printf() format string, but notice
531 * [string precision pitfalls][string-precision]
532 * @args: the list of parameters to insert into the format string
533 *
534 * Similar to the standard C vsprintf() function but safer, since it
535 * calculates the maximum space required and allocates memory to hold
536 * the result. The returned string should be freed with g_free() when
537 * no longer needed.
538 *
539 * The returned string is guaranteed to be non-NULL, unless @format
540 * contains `%lc` or `%ls` conversions, which can fail if no multibyte
541 * representation is available for the given character.
542 *
543 * See also g_vasprintf(), which offers the same functionality, but
544 * additionally returns the length of the allocated string.
545 *
546 * Returns: a newly-allocated string holding the result
547 */
548 gchar*
g_strdup_vprintf(const gchar * format,va_list args)549 g_strdup_vprintf (const gchar *format,
550 va_list args)
551 {
552 gchar *string = NULL;
553
554 g_vasprintf (&string, format, args);
555
556 return string;
557 }
558
559 /**
560 * g_strdup_printf:
561 * @format: (not nullable): a standard printf() format string, but notice
562 * [string precision pitfalls][string-precision]
563 * @...: the parameters to insert into the format string
564 *
565 * Similar to the standard C sprintf() function but safer, since it
566 * calculates the maximum space required and allocates memory to hold
567 * the result. The returned string should be freed with g_free() when no
568 * longer needed.
569 *
570 * The returned string is guaranteed to be non-NULL, unless @format
571 * contains `%lc` or `%ls` conversions, which can fail if no multibyte
572 * representation is available for the given character.
573 *
574 * Returns: a newly-allocated string holding the result
575 */
576 gchar*
g_strdup_printf(const gchar * format,...)577 g_strdup_printf (const gchar *format,
578 ...)
579 {
580 gchar *buffer;
581 va_list args;
582
583 va_start (args, format);
584 buffer = g_strdup_vprintf (format, args);
585 va_end (args);
586
587 return buffer;
588 }
589
590 /**
591 * g_strconcat:
592 * @string1: the first string to add, which must not be %NULL
593 * @...: a %NULL-terminated list of strings to append to the string
594 *
595 * Concatenates all of the given strings into one long string. The
596 * returned string should be freed with g_free() when no longer needed.
597 *
598 * The variable argument list must end with %NULL. If you forget the %NULL,
599 * g_strconcat() will start appending random memory junk to your string.
600 *
601 * Note that this function is usually not the right function to use to
602 * assemble a translated message from pieces, since proper translation
603 * often requires the pieces to be reordered.
604 *
605 * Returns: a newly-allocated string containing all the string arguments
606 */
607 gchar*
g_strconcat(const gchar * string1,...)608 g_strconcat (const gchar *string1, ...)
609 {
610 gsize l;
611 va_list args;
612 gchar *s;
613 gchar *concat;
614 gchar *ptr;
615
616 if (!string1)
617 return NULL;
618
619 l = 1 + strlen (string1);
620 va_start (args, string1);
621 s = va_arg (args, gchar*);
622 while (s)
623 {
624 l += strlen (s);
625 s = va_arg (args, gchar*);
626 }
627 va_end (args);
628
629 concat = g_new (gchar, l);
630 ptr = concat;
631
632 ptr = g_stpcpy (ptr, string1);
633 va_start (args, string1);
634 s = va_arg (args, gchar*);
635 while (s)
636 {
637 ptr = g_stpcpy (ptr, s);
638 s = va_arg (args, gchar*);
639 }
640 va_end (args);
641
642 return concat;
643 }
644
645 /**
646 * g_strtod:
647 * @nptr: the string to convert to a numeric value.
648 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
649 * character after the last character used in the conversion.
650 *
651 * Converts a string to a #gdouble value.
652 * It calls the standard strtod() function to handle the conversion, but
653 * if the string is not completely converted it attempts the conversion
654 * again with g_ascii_strtod(), and returns the best match.
655 *
656 * This function should seldom be used. The normal situation when reading
657 * numbers not for human consumption is to use g_ascii_strtod(). Only when
658 * you know that you must expect both locale formatted and C formatted numbers
659 * should you use this. Make sure that you don't pass strings such as comma
660 * separated lists of values, since the commas may be interpreted as a decimal
661 * point in some locales, causing unexpected results.
662 *
663 * Returns: the #gdouble value.
664 **/
665 gdouble
g_strtod(const gchar * nptr,gchar ** endptr)666 g_strtod (const gchar *nptr,
667 gchar **endptr)
668 {
669 gchar *fail_pos_1;
670 gchar *fail_pos_2;
671 gdouble val_1;
672 gdouble val_2 = 0;
673
674 g_return_val_if_fail (nptr != NULL, 0);
675
676 fail_pos_1 = NULL;
677 fail_pos_2 = NULL;
678
679 val_1 = strtod (nptr, &fail_pos_1);
680
681 if (fail_pos_1 && fail_pos_1[0] != 0)
682 val_2 = g_ascii_strtod (nptr, &fail_pos_2);
683
684 if (!fail_pos_1 || fail_pos_1[0] == 0 || fail_pos_1 >= fail_pos_2)
685 {
686 if (endptr)
687 *endptr = fail_pos_1;
688 return val_1;
689 }
690 else
691 {
692 if (endptr)
693 *endptr = fail_pos_2;
694 return val_2;
695 }
696 }
697
698 /**
699 * g_ascii_strtod:
700 * @nptr: the string to convert to a numeric value.
701 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
702 * character after the last character used in the conversion.
703 *
704 * Converts a string to a #gdouble value.
705 *
706 * This function behaves like the standard strtod() function
707 * does in the C locale. It does this without actually changing
708 * the current locale, since that would not be thread-safe.
709 * A limitation of the implementation is that this function
710 * will still accept localized versions of infinities and NANs.
711 *
712 * This function is typically used when reading configuration
713 * files or other non-user input that should be locale independent.
714 * To handle input from the user you should normally use the
715 * locale-sensitive system strtod() function.
716 *
717 * To convert from a #gdouble to a string in a locale-insensitive
718 * way, use g_ascii_dtostr().
719 *
720 * If the correct value would cause overflow, plus or minus %HUGE_VAL
721 * is returned (according to the sign of the value), and %ERANGE is
722 * stored in %errno. If the correct value would cause underflow,
723 * zero is returned and %ERANGE is stored in %errno.
724 *
725 * This function resets %errno before calling strtod() so that
726 * you can reliably detect overflow and underflow.
727 *
728 * Returns: the #gdouble value.
729 */
730 gdouble
g_ascii_strtod(const gchar * nptr,gchar ** endptr)731 g_ascii_strtod (const gchar *nptr,
732 gchar **endptr)
733 {
734 #ifdef USE_XLOCALE
735
736 g_return_val_if_fail (nptr != NULL, 0);
737
738 errno = 0;
739
740 return strtod_l (nptr, endptr, get_C_locale ());
741
742 #else
743
744 gchar *fail_pos;
745 gdouble val;
746 #ifndef __BIONIC__
747 struct lconv *locale_data;
748 #endif
749 const char *decimal_point;
750 gsize decimal_point_len;
751 const char *p, *decimal_point_pos;
752 const char *end = NULL; /* Silence gcc */
753 int strtod_errno;
754
755 g_return_val_if_fail (nptr != NULL, 0);
756
757 fail_pos = NULL;
758
759 #ifndef __BIONIC__
760 locale_data = localeconv ();
761 decimal_point = locale_data->decimal_point;
762 decimal_point_len = strlen (decimal_point);
763 #else
764 decimal_point = ".";
765 decimal_point_len = 1;
766 #endif
767
768 g_assert (decimal_point_len != 0);
769
770 decimal_point_pos = NULL;
771 end = NULL;
772
773 if (decimal_point[0] != '.' ||
774 decimal_point[1] != 0)
775 {
776 p = nptr;
777 /* Skip leading space */
778 while (g_ascii_isspace (*p))
779 p++;
780
781 /* Skip leading optional sign */
782 if (*p == '+' || *p == '-')
783 p++;
784
785 if (p[0] == '0' &&
786 (p[1] == 'x' || p[1] == 'X'))
787 {
788 p += 2;
789 /* HEX - find the (optional) decimal point */
790
791 while (g_ascii_isxdigit (*p))
792 p++;
793
794 if (*p == '.')
795 decimal_point_pos = p++;
796
797 while (g_ascii_isxdigit (*p))
798 p++;
799
800 if (*p == 'p' || *p == 'P')
801 p++;
802 if (*p == '+' || *p == '-')
803 p++;
804 while (g_ascii_isdigit (*p))
805 p++;
806
807 end = p;
808 }
809 else if (g_ascii_isdigit (*p) || *p == '.')
810 {
811 while (g_ascii_isdigit (*p))
812 p++;
813
814 if (*p == '.')
815 decimal_point_pos = p++;
816
817 while (g_ascii_isdigit (*p))
818 p++;
819
820 if (*p == 'e' || *p == 'E')
821 p++;
822 if (*p == '+' || *p == '-')
823 p++;
824 while (g_ascii_isdigit (*p))
825 p++;
826
827 end = p;
828 }
829 /* For the other cases, we need not convert the decimal point */
830 }
831
832 if (decimal_point_pos)
833 {
834 char *copy, *c;
835
836 /* We need to convert the '.' to the locale specific decimal point */
837 copy = g_malloc (end - nptr + 1 + decimal_point_len);
838
839 c = copy;
840 memcpy (c, nptr, decimal_point_pos - nptr);
841 c += decimal_point_pos - nptr;
842 memcpy (c, decimal_point, decimal_point_len);
843 c += decimal_point_len;
844 memcpy (c, decimal_point_pos + 1, end - (decimal_point_pos + 1));
845 c += end - (decimal_point_pos + 1);
846 *c = 0;
847
848 errno = 0;
849 val = strtod (copy, &fail_pos);
850 strtod_errno = errno;
851
852 if (fail_pos)
853 {
854 if (fail_pos - copy > decimal_point_pos - nptr)
855 fail_pos = (char *)nptr + (fail_pos - copy) - (decimal_point_len - 1);
856 else
857 fail_pos = (char *)nptr + (fail_pos - copy);
858 }
859
860 g_free (copy);
861
862 }
863 else if (end)
864 {
865 char *copy;
866
867 copy = g_malloc (end - (char *)nptr + 1);
868 memcpy (copy, nptr, end - nptr);
869 *(copy + (end - (char *)nptr)) = 0;
870
871 errno = 0;
872 val = strtod (copy, &fail_pos);
873 strtod_errno = errno;
874
875 if (fail_pos)
876 {
877 fail_pos = (char *)nptr + (fail_pos - copy);
878 }
879
880 g_free (copy);
881 }
882 else
883 {
884 errno = 0;
885 val = strtod (nptr, &fail_pos);
886 strtod_errno = errno;
887 }
888
889 if (endptr)
890 *endptr = fail_pos;
891
892 errno = strtod_errno;
893
894 return val;
895 #endif
896 }
897
898
899 /**
900 * g_ascii_dtostr:
901 * @buffer: A buffer to place the resulting string in
902 * @buf_len: The length of the buffer.
903 * @d: The #gdouble to convert
904 *
905 * Converts a #gdouble to a string, using the '.' as
906 * decimal point.
907 *
908 * This function generates enough precision that converting
909 * the string back using g_ascii_strtod() gives the same machine-number
910 * (on machines with IEEE compatible 64bit doubles). It is
911 * guaranteed that the size of the resulting string will never
912 * be larger than @G_ASCII_DTOSTR_BUF_SIZE bytes, including the terminating
913 * nul character, which is always added.
914 *
915 * Returns: The pointer to the buffer with the converted string.
916 **/
917 gchar *
g_ascii_dtostr(gchar * buffer,gint buf_len,gdouble d)918 g_ascii_dtostr (gchar *buffer,
919 gint buf_len,
920 gdouble d)
921 {
922 return g_ascii_formatd (buffer, buf_len, "%.17g", d);
923 }
924
925 #pragma GCC diagnostic push
926 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
927
928 /**
929 * g_ascii_formatd:
930 * @buffer: A buffer to place the resulting string in
931 * @buf_len: The length of the buffer.
932 * @format: The printf()-style format to use for the
933 * code to use for converting.
934 * @d: The #gdouble to convert
935 *
936 * Converts a #gdouble to a string, using the '.' as
937 * decimal point. To format the number you pass in
938 * a printf()-style format string. Allowed conversion
939 * specifiers are 'e', 'E', 'f', 'F', 'g' and 'G'.
940 *
941 * The returned buffer is guaranteed to be nul-terminated.
942 *
943 * If you just want to want to serialize the value into a
944 * string, use g_ascii_dtostr().
945 *
946 * Returns: The pointer to the buffer with the converted string.
947 */
948 gchar *
g_ascii_formatd(gchar * buffer,gint buf_len,const gchar * format,gdouble d)949 g_ascii_formatd (gchar *buffer,
950 gint buf_len,
951 const gchar *format,
952 gdouble d)
953 {
954 #ifdef USE_XLOCALE
955 locale_t old_locale;
956
957 old_locale = uselocale (get_C_locale ());
958 _g_snprintf (buffer, buf_len, format, d);
959 uselocale (old_locale);
960
961 return buffer;
962 #else
963 #ifndef __BIONIC__
964 struct lconv *locale_data;
965 #endif
966 const char *decimal_point;
967 gsize decimal_point_len;
968 gchar *p;
969 int rest_len;
970 gchar format_char;
971
972 g_return_val_if_fail (buffer != NULL, NULL);
973 g_return_val_if_fail (format[0] == '%', NULL);
974 g_return_val_if_fail (strpbrk (format + 1, "'l%") == NULL, NULL);
975
976 format_char = format[strlen (format) - 1];
977
978 g_return_val_if_fail (format_char == 'e' || format_char == 'E' ||
979 format_char == 'f' || format_char == 'F' ||
980 format_char == 'g' || format_char == 'G',
981 NULL);
982
983 if (format[0] != '%')
984 return NULL;
985
986 if (strpbrk (format + 1, "'l%"))
987 return NULL;
988
989 if (!(format_char == 'e' || format_char == 'E' ||
990 format_char == 'f' || format_char == 'F' ||
991 format_char == 'g' || format_char == 'G'))
992 return NULL;
993
994 _g_snprintf (buffer, buf_len, format, d);
995
996 #ifndef __BIONIC__
997 locale_data = localeconv ();
998 decimal_point = locale_data->decimal_point;
999 decimal_point_len = strlen (decimal_point);
1000 #else
1001 decimal_point = ".";
1002 decimal_point_len = 1;
1003 #endif
1004
1005 g_assert (decimal_point_len != 0);
1006
1007 if (decimal_point[0] != '.' ||
1008 decimal_point[1] != 0)
1009 {
1010 p = buffer;
1011
1012 while (g_ascii_isspace (*p))
1013 p++;
1014
1015 if (*p == '+' || *p == '-')
1016 p++;
1017
1018 while (isdigit ((guchar)*p))
1019 p++;
1020
1021 if (strncmp (p, decimal_point, decimal_point_len) == 0)
1022 {
1023 *p = '.';
1024 p++;
1025 if (decimal_point_len > 1)
1026 {
1027 rest_len = strlen (p + (decimal_point_len - 1));
1028 memmove (p, p + (decimal_point_len - 1), rest_len);
1029 p[rest_len] = 0;
1030 }
1031 }
1032 }
1033
1034 return buffer;
1035 #endif
1036 }
1037 #pragma GCC diagnostic pop
1038
1039 #define ISSPACE(c) ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
1040 (c) == '\r' || (c) == '\t' || (c) == '\v')
1041 #define ISUPPER(c) ((c) >= 'A' && (c) <= 'Z')
1042 #define ISLOWER(c) ((c) >= 'a' && (c) <= 'z')
1043 #define ISALPHA(c) (ISUPPER (c) || ISLOWER (c))
1044 #define TOUPPER(c) (ISLOWER (c) ? (c) - 'a' + 'A' : (c))
1045 #define TOLOWER(c) (ISUPPER (c) ? (c) - 'A' + 'a' : (c))
1046
1047 #ifndef USE_XLOCALE
1048
1049 static guint64
g_parse_long_long(const gchar * nptr,const gchar ** endptr,guint base,gboolean * negative)1050 g_parse_long_long (const gchar *nptr,
1051 const gchar **endptr,
1052 guint base,
1053 gboolean *negative)
1054 {
1055 /* this code is based on on the strtol(3) code from GNU libc released under
1056 * the GNU Lesser General Public License.
1057 *
1058 * Copyright (C) 1991,92,94,95,96,97,98,99,2000,01,02
1059 * Free Software Foundation, Inc.
1060 */
1061 gboolean overflow;
1062 guint64 cutoff;
1063 guint64 cutlim;
1064 guint64 ui64;
1065 const gchar *s, *save;
1066 guchar c;
1067
1068 g_return_val_if_fail (nptr != NULL, 0);
1069
1070 *negative = FALSE;
1071 if (base == 1 || base > 36)
1072 {
1073 errno = EINVAL;
1074 if (endptr)
1075 *endptr = nptr;
1076 return 0;
1077 }
1078
1079 save = s = nptr;
1080
1081 /* Skip white space. */
1082 while (ISSPACE (*s))
1083 ++s;
1084
1085 if (G_UNLIKELY (!*s))
1086 goto noconv;
1087
1088 /* Check for a sign. */
1089 if (*s == '-')
1090 {
1091 *negative = TRUE;
1092 ++s;
1093 }
1094 else if (*s == '+')
1095 ++s;
1096
1097 /* Recognize number prefix and if BASE is zero, figure it out ourselves. */
1098 if (*s == '0')
1099 {
1100 if ((base == 0 || base == 16) && TOUPPER (s[1]) == 'X')
1101 {
1102 s += 2;
1103 base = 16;
1104 }
1105 else if (base == 0)
1106 base = 8;
1107 }
1108 else if (base == 0)
1109 base = 10;
1110
1111 /* Save the pointer so we can check later if anything happened. */
1112 save = s;
1113 cutoff = G_MAXUINT64 / base;
1114 cutlim = G_MAXUINT64 % base;
1115
1116 overflow = FALSE;
1117 ui64 = 0;
1118 c = *s;
1119 for (; c; c = *++s)
1120 {
1121 if (c >= '0' && c <= '9')
1122 c -= '0';
1123 else if (ISALPHA (c))
1124 c = TOUPPER (c) - 'A' + 10;
1125 else
1126 break;
1127 if (c >= base)
1128 break;
1129 /* Check for overflow. */
1130 if (ui64 > cutoff || (ui64 == cutoff && c > cutlim))
1131 overflow = TRUE;
1132 else
1133 {
1134 ui64 *= base;
1135 ui64 += c;
1136 }
1137 }
1138
1139 /* Check if anything actually happened. */
1140 if (s == save)
1141 goto noconv;
1142
1143 /* Store in ENDPTR the address of one character
1144 past the last character we converted. */
1145 if (endptr)
1146 *endptr = s;
1147
1148 if (G_UNLIKELY (overflow))
1149 {
1150 errno = ERANGE;
1151 return G_MAXUINT64;
1152 }
1153
1154 return ui64;
1155
1156 noconv:
1157 /* We must handle a special case here: the base is 0 or 16 and the
1158 first two characters are '0' and 'x', but the rest are no
1159 hexadecimal digits. This is no error case. We return 0 and
1160 ENDPTR points to the `x`. */
1161 if (endptr)
1162 {
1163 if (save - nptr >= 2 && TOUPPER (save[-1]) == 'X'
1164 && save[-2] == '0')
1165 *endptr = &save[-1];
1166 else
1167 /* There was no number to convert. */
1168 *endptr = nptr;
1169 }
1170 return 0;
1171 }
1172 #endif /* !USE_XLOCALE */
1173
1174 /**
1175 * g_ascii_strtoull:
1176 * @nptr: the string to convert to a numeric value.
1177 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
1178 * character after the last character used in the conversion.
1179 * @base: to be used for the conversion, 2..36 or 0
1180 *
1181 * Converts a string to a #guint64 value.
1182 * This function behaves like the standard strtoull() function
1183 * does in the C locale. It does this without actually
1184 * changing the current locale, since that would not be
1185 * thread-safe.
1186 *
1187 * Note that input with a leading minus sign (`-`) is accepted, and will return
1188 * the negation of the parsed number, unless that would overflow a #guint64.
1189 * Critically, this means you cannot assume that a short fixed length input will
1190 * never result in a low return value, as the input could have a leading `-`.
1191 *
1192 * This function is typically used when reading configuration
1193 * files or other non-user input that should be locale independent.
1194 * To handle input from the user you should normally use the
1195 * locale-sensitive system strtoull() function.
1196 *
1197 * If the correct value would cause overflow, %G_MAXUINT64
1198 * is returned, and `ERANGE` is stored in `errno`.
1199 * If the base is outside the valid range, zero is returned, and
1200 * `EINVAL` is stored in `errno`.
1201 * If the string conversion fails, zero is returned, and @endptr returns
1202 * @nptr (if @endptr is non-%NULL).
1203 *
1204 * Returns: the #guint64 value or zero on error.
1205 *
1206 * Since: 2.2
1207 */
1208 guint64
g_ascii_strtoull(const gchar * nptr,gchar ** endptr,guint base)1209 g_ascii_strtoull (const gchar *nptr,
1210 gchar **endptr,
1211 guint base)
1212 {
1213 #ifdef USE_XLOCALE
1214 return strtoull_l (nptr, endptr, base, get_C_locale ());
1215 #else
1216 gboolean negative;
1217 guint64 result;
1218
1219 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1220
1221 /* Return the result of the appropriate sign. */
1222 return negative ? -result : result;
1223 #endif
1224 }
1225
1226 /**
1227 * g_ascii_strtoll:
1228 * @nptr: the string to convert to a numeric value.
1229 * @endptr: (out) (transfer none) (optional): if non-%NULL, it returns the
1230 * character after the last character used in the conversion.
1231 * @base: to be used for the conversion, 2..36 or 0
1232 *
1233 * Converts a string to a #gint64 value.
1234 * This function behaves like the standard strtoll() function
1235 * does in the C locale. It does this without actually
1236 * changing the current locale, since that would not be
1237 * thread-safe.
1238 *
1239 * This function is typically used when reading configuration
1240 * files or other non-user input that should be locale independent.
1241 * To handle input from the user you should normally use the
1242 * locale-sensitive system strtoll() function.
1243 *
1244 * If the correct value would cause overflow, %G_MAXINT64 or %G_MININT64
1245 * is returned, and `ERANGE` is stored in `errno`.
1246 * If the base is outside the valid range, zero is returned, and
1247 * `EINVAL` is stored in `errno`. If the
1248 * string conversion fails, zero is returned, and @endptr returns @nptr
1249 * (if @endptr is non-%NULL).
1250 *
1251 * Returns: the #gint64 value or zero on error.
1252 *
1253 * Since: 2.12
1254 */
1255 gint64
g_ascii_strtoll(const gchar * nptr,gchar ** endptr,guint base)1256 g_ascii_strtoll (const gchar *nptr,
1257 gchar **endptr,
1258 guint base)
1259 {
1260 #ifdef USE_XLOCALE
1261 return strtoll_l (nptr, endptr, base, get_C_locale ());
1262 #else
1263 gboolean negative;
1264 guint64 result;
1265
1266 result = g_parse_long_long (nptr, (const gchar **) endptr, base, &negative);
1267
1268 if (negative && result > (guint64) G_MININT64)
1269 {
1270 errno = ERANGE;
1271 return G_MININT64;
1272 }
1273 else if (!negative && result > (guint64) G_MAXINT64)
1274 {
1275 errno = ERANGE;
1276 return G_MAXINT64;
1277 }
1278 else if (negative)
1279 return - (gint64) result;
1280 else
1281 return (gint64) result;
1282 #endif
1283 }
1284
1285 /**
1286 * g_strerror:
1287 * @errnum: the system error number. See the standard C %errno
1288 * documentation
1289 *
1290 * Returns a string corresponding to the given error code, e.g. "no
1291 * such process". Unlike strerror(), this always returns a string in
1292 * UTF-8 encoding, and the pointer is guaranteed to remain valid for
1293 * the lifetime of the process.
1294 *
1295 * Note that the string may be translated according to the current locale.
1296 *
1297 * The value of %errno will not be changed by this function. However, it may
1298 * be changed by intermediate function calls, so you should save its value
1299 * as soon as the call returns:
1300 * |[
1301 * int saved_errno;
1302 *
1303 * ret = read (blah);
1304 * saved_errno = errno;
1305 *
1306 * g_strerror (saved_errno);
1307 * ]|
1308 *
1309 * Returns: a UTF-8 string describing the error code. If the error code
1310 * is unknown, it returns a string like "unknown error (<code>)".
1311 */
1312 const gchar *
g_strerror(gint errnum)1313 g_strerror (gint errnum)
1314 {
1315 static GHashTable *errors;
1316 G_LOCK_DEFINE_STATIC (errors);
1317 const gchar *msg;
1318 gint saved_errno = errno;
1319
1320 G_LOCK (errors);
1321 if (errors)
1322 msg = g_hash_table_lookup (errors, GINT_TO_POINTER (errnum));
1323 else
1324 {
1325 errors = g_hash_table_new (NULL, NULL);
1326 msg = NULL;
1327 }
1328
1329 if (!msg)
1330 {
1331 gchar buf[1024];
1332 GError *error = NULL;
1333
1334 #if defined(G_OS_WIN32)
1335 strerror_s (buf, sizeof (buf), errnum);
1336 msg = buf;
1337 #elif defined(HAVE_STRERROR_R)
1338 /* Match the condition in strerror_r(3) for glibc */
1339 # if defined(STRERROR_R_CHAR_P)
1340 msg = strerror_r (errnum, buf, sizeof (buf));
1341 # else
1342 (void) strerror_r (errnum, buf, sizeof (buf));
1343 msg = buf;
1344 # endif /* HAVE_STRERROR_R */
1345 #else
1346 g_strlcpy (buf, strerror (errnum), sizeof (buf));
1347 msg = buf;
1348 #endif
1349 if (!g_get_console_charset (NULL))
1350 {
1351 msg = g_locale_to_utf8 (msg, -1, NULL, NULL, &error);
1352 if (error)
1353 g_print ("%s\n", error->message);
1354 }
1355 else if (msg == (const gchar *)buf)
1356 msg = g_strdup (buf);
1357
1358 g_hash_table_insert (errors, GINT_TO_POINTER (errnum), (char *) msg);
1359 }
1360 G_UNLOCK (errors);
1361
1362 errno = saved_errno;
1363 return msg;
1364 }
1365
1366 /**
1367 * g_strsignal:
1368 * @signum: the signal number. See the `signal` documentation
1369 *
1370 * Returns a string describing the given signal, e.g. "Segmentation fault".
1371 * You should use this function in preference to strsignal(), because it
1372 * returns a string in UTF-8 encoding, and since not all platforms support
1373 * the strsignal() function.
1374 *
1375 * Returns: a UTF-8 string describing the signal. If the signal is unknown,
1376 * it returns "unknown signal (<signum>)".
1377 */
1378 const gchar *
g_strsignal(gint signum)1379 g_strsignal (gint signum)
1380 {
1381 gchar *msg;
1382 gchar *tofree;
1383 const gchar *ret;
1384
1385 msg = tofree = NULL;
1386
1387 #ifdef HAVE_STRSIGNAL
1388 msg = strsignal (signum);
1389 if (!g_get_console_charset (NULL))
1390 msg = tofree = g_locale_to_utf8 (msg, -1, NULL, NULL, NULL);
1391 #endif
1392
1393 if (!msg)
1394 msg = tofree = g_strdup_printf ("unknown signal (%d)", signum);
1395 ret = g_intern_string (msg);
1396 g_free (tofree);
1397
1398 return ret;
1399 }
1400
1401 /* Functions g_strlcpy and g_strlcat were originally developed by
1402 * Todd C. Miller <Todd.Miller@courtesan.com> to simplify writing secure code.
1403 * See http://www.openbsd.org/cgi-bin/man.cgi?query=strlcpy
1404 * for more information.
1405 */
1406
1407 #ifdef HAVE_STRLCPY
1408 /* Use the native ones, if available; they might be implemented in assembly */
1409 gsize
g_strlcpy(gchar * dest,const gchar * src,gsize dest_size)1410 g_strlcpy (gchar *dest,
1411 const gchar *src,
1412 gsize dest_size)
1413 {
1414 g_return_val_if_fail (dest != NULL, 0);
1415 g_return_val_if_fail (src != NULL, 0);
1416
1417 return strlcpy (dest, src, dest_size);
1418 }
1419
1420 gsize
g_strlcat(gchar * dest,const gchar * src,gsize dest_size)1421 g_strlcat (gchar *dest,
1422 const gchar *src,
1423 gsize dest_size)
1424 {
1425 g_return_val_if_fail (dest != NULL, 0);
1426 g_return_val_if_fail (src != NULL, 0);
1427
1428 return strlcat (dest, src, dest_size);
1429 }
1430
1431 #else /* ! HAVE_STRLCPY */
1432 /**
1433 * g_strlcpy:
1434 * @dest: destination buffer
1435 * @src: source buffer
1436 * @dest_size: length of @dest in bytes
1437 *
1438 * Portability wrapper that calls strlcpy() on systems which have it,
1439 * and emulates strlcpy() otherwise. Copies @src to @dest; @dest is
1440 * guaranteed to be nul-terminated; @src must be nul-terminated;
1441 * @dest_size is the buffer size, not the number of bytes to copy.
1442 *
1443 * At most @dest_size - 1 characters will be copied. Always nul-terminates
1444 * (unless @dest_size is 0). This function does not allocate memory. Unlike
1445 * strncpy(), this function doesn't pad @dest (so it's often faster). It
1446 * returns the size of the attempted result, strlen (src), so if
1447 * @retval >= @dest_size, truncation occurred.
1448 *
1449 * Caveat: strlcpy() is supposedly more secure than strcpy() or strncpy(),
1450 * but if you really want to avoid screwups, g_strdup() is an even better
1451 * idea.
1452 *
1453 * Returns: length of @src
1454 */
1455 gsize
g_strlcpy(gchar * dest,const gchar * src,gsize dest_size)1456 g_strlcpy (gchar *dest,
1457 const gchar *src,
1458 gsize dest_size)
1459 {
1460 gchar *d = dest;
1461 const gchar *s = src;
1462 gsize n = dest_size;
1463
1464 g_return_val_if_fail (dest != NULL, 0);
1465 g_return_val_if_fail (src != NULL, 0);
1466
1467 /* Copy as many bytes as will fit */
1468 if (n != 0 && --n != 0)
1469 do
1470 {
1471 gchar c = *s++;
1472
1473 *d++ = c;
1474 if (c == 0)
1475 break;
1476 }
1477 while (--n != 0);
1478
1479 /* If not enough room in dest, add NUL and traverse rest of src */
1480 if (n == 0)
1481 {
1482 if (dest_size != 0)
1483 *d = 0;
1484 while (*s++)
1485 ;
1486 }
1487
1488 return s - src - 1; /* count does not include NUL */
1489 }
1490
1491 /**
1492 * g_strlcat:
1493 * @dest: destination buffer, already containing one nul-terminated string
1494 * @src: source buffer
1495 * @dest_size: length of @dest buffer in bytes (not length of existing string
1496 * inside @dest)
1497 *
1498 * Portability wrapper that calls strlcat() on systems which have it,
1499 * and emulates it otherwise. Appends nul-terminated @src string to @dest,
1500 * guaranteeing nul-termination for @dest. The total size of @dest won't
1501 * exceed @dest_size.
1502 *
1503 * At most @dest_size - 1 characters will be copied. Unlike strncat(),
1504 * @dest_size is the full size of dest, not the space left over. This
1505 * function does not allocate memory. It always nul-terminates (unless
1506 * @dest_size == 0 or there were no nul characters in the @dest_size
1507 * characters of dest to start with).
1508 *
1509 * Caveat: this is supposedly a more secure alternative to strcat() or
1510 * strncat(), but for real security g_strconcat() is harder to mess up.
1511 *
1512 * Returns: size of attempted result, which is MIN (dest_size, strlen
1513 * (original dest)) + strlen (src), so if retval >= dest_size,
1514 * truncation occurred.
1515 */
1516 gsize
g_strlcat(gchar * dest,const gchar * src,gsize dest_size)1517 g_strlcat (gchar *dest,
1518 const gchar *src,
1519 gsize dest_size)
1520 {
1521 gchar *d = dest;
1522 const gchar *s = src;
1523 gsize bytes_left = dest_size;
1524 gsize dlength; /* Logically, MIN (strlen (d), dest_size) */
1525
1526 g_return_val_if_fail (dest != NULL, 0);
1527 g_return_val_if_fail (src != NULL, 0);
1528
1529 /* Find the end of dst and adjust bytes left but don't go past end */
1530 while (*d != 0 && bytes_left-- != 0)
1531 d++;
1532 dlength = d - dest;
1533 bytes_left = dest_size - dlength;
1534
1535 if (bytes_left == 0)
1536 return dlength + strlen (s);
1537
1538 while (*s != 0)
1539 {
1540 if (bytes_left != 1)
1541 {
1542 *d++ = *s;
1543 bytes_left--;
1544 }
1545 s++;
1546 }
1547 *d = 0;
1548
1549 return dlength + (s - src); /* count does not include NUL */
1550 }
1551 #endif /* ! HAVE_STRLCPY */
1552
1553 /**
1554 * g_ascii_strdown:
1555 * @str: a string
1556 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1557 *
1558 * Converts all upper case ASCII letters to lower case ASCII letters.
1559 *
1560 * Returns: a newly-allocated string, with all the upper case
1561 * characters in @str converted to lower case, with semantics that
1562 * exactly match g_ascii_tolower(). (Note that this is unlike the
1563 * old g_strdown(), which modified the string in place.)
1564 */
1565 gchar*
g_ascii_strdown(const gchar * str,gssize len)1566 g_ascii_strdown (const gchar *str,
1567 gssize len)
1568 {
1569 gchar *result, *s;
1570
1571 g_return_val_if_fail (str != NULL, NULL);
1572
1573 if (len < 0)
1574 len = (gssize) strlen (str);
1575
1576 result = g_strndup (str, (gsize) len);
1577 for (s = result; *s; s++)
1578 *s = g_ascii_tolower (*s);
1579
1580 return result;
1581 }
1582
1583 /**
1584 * g_ascii_strup:
1585 * @str: a string
1586 * @len: length of @str in bytes, or -1 if @str is nul-terminated
1587 *
1588 * Converts all lower case ASCII letters to upper case ASCII letters.
1589 *
1590 * Returns: a newly allocated string, with all the lower case
1591 * characters in @str converted to upper case, with semantics that
1592 * exactly match g_ascii_toupper(). (Note that this is unlike the
1593 * old g_strup(), which modified the string in place.)
1594 */
1595 gchar*
g_ascii_strup(const gchar * str,gssize len)1596 g_ascii_strup (const gchar *str,
1597 gssize len)
1598 {
1599 gchar *result, *s;
1600
1601 g_return_val_if_fail (str != NULL, NULL);
1602
1603 if (len < 0)
1604 len = (gssize) strlen (str);
1605
1606 result = g_strndup (str, (gsize) len);
1607 for (s = result; *s; s++)
1608 *s = g_ascii_toupper (*s);
1609
1610 return result;
1611 }
1612
1613 /**
1614 * g_str_is_ascii:
1615 * @str: a string
1616 *
1617 * Determines if a string is pure ASCII. A string is pure ASCII if it
1618 * contains no bytes with the high bit set.
1619 *
1620 * Returns: %TRUE if @str is ASCII
1621 *
1622 * Since: 2.40
1623 */
1624 gboolean
g_str_is_ascii(const gchar * str)1625 g_str_is_ascii (const gchar *str)
1626 {
1627 gsize i;
1628
1629 for (i = 0; str[i]; i++)
1630 if (str[i] & 0x80)
1631 return FALSE;
1632
1633 return TRUE;
1634 }
1635
1636 /**
1637 * g_strdown:
1638 * @string: the string to convert.
1639 *
1640 * Converts a string to lower case.
1641 *
1642 * Returns: the string
1643 *
1644 * Deprecated:2.2: This function is totally broken for the reasons discussed
1645 * in the g_strncasecmp() docs - use g_ascii_strdown() or g_utf8_strdown()
1646 * instead.
1647 **/
1648 gchar*
g_strdown(gchar * string)1649 g_strdown (gchar *string)
1650 {
1651 guchar *s;
1652
1653 g_return_val_if_fail (string != NULL, NULL);
1654
1655 s = (guchar *) string;
1656
1657 while (*s)
1658 {
1659 if (isupper (*s))
1660 *s = tolower (*s);
1661 s++;
1662 }
1663
1664 return (gchar *) string;
1665 }
1666
1667 /**
1668 * g_strup:
1669 * @string: the string to convert
1670 *
1671 * Converts a string to upper case.
1672 *
1673 * Returns: the string
1674 *
1675 * Deprecated:2.2: This function is totally broken for the reasons
1676 * discussed in the g_strncasecmp() docs - use g_ascii_strup()
1677 * or g_utf8_strup() instead.
1678 */
1679 gchar*
g_strup(gchar * string)1680 g_strup (gchar *string)
1681 {
1682 guchar *s;
1683
1684 g_return_val_if_fail (string != NULL, NULL);
1685
1686 s = (guchar *) string;
1687
1688 while (*s)
1689 {
1690 if (islower (*s))
1691 *s = toupper (*s);
1692 s++;
1693 }
1694
1695 return (gchar *) string;
1696 }
1697
1698 /**
1699 * g_strreverse:
1700 * @string: the string to reverse
1701 *
1702 * Reverses all of the bytes in a string. For example,
1703 * `g_strreverse ("abcdef")` will result in "fedcba".
1704 *
1705 * Note that g_strreverse() doesn't work on UTF-8 strings
1706 * containing multibyte characters. For that purpose, use
1707 * g_utf8_strreverse().
1708 *
1709 * Returns: the same pointer passed in as @string
1710 */
1711 gchar*
g_strreverse(gchar * string)1712 g_strreverse (gchar *string)
1713 {
1714 g_return_val_if_fail (string != NULL, NULL);
1715
1716 if (*string)
1717 {
1718 gchar *h, *t;
1719
1720 h = string;
1721 t = string + strlen (string) - 1;
1722
1723 while (h < t)
1724 {
1725 gchar c;
1726
1727 c = *h;
1728 *h = *t;
1729 h++;
1730 *t = c;
1731 t--;
1732 }
1733 }
1734
1735 return string;
1736 }
1737
1738 /**
1739 * g_ascii_tolower:
1740 * @c: any character
1741 *
1742 * Convert a character to ASCII lower case.
1743 *
1744 * Unlike the standard C library tolower() function, this only
1745 * recognizes standard ASCII letters and ignores the locale, returning
1746 * all non-ASCII characters unchanged, even if they are lower case
1747 * letters in a particular character set. Also unlike the standard
1748 * library function, this takes and returns a char, not an int, so
1749 * don't call it on %EOF but no need to worry about casting to #guchar
1750 * before passing a possibly non-ASCII character in.
1751 *
1752 * Returns: the result of converting @c to lower case. If @c is
1753 * not an ASCII upper case letter, @c is returned unchanged.
1754 */
1755 gchar
g_ascii_tolower(gchar c)1756 g_ascii_tolower (gchar c)
1757 {
1758 return g_ascii_isupper (c) ? c - 'A' + 'a' : c;
1759 }
1760
1761 /**
1762 * g_ascii_toupper:
1763 * @c: any character
1764 *
1765 * Convert a character to ASCII upper case.
1766 *
1767 * Unlike the standard C library toupper() function, this only
1768 * recognizes standard ASCII letters and ignores the locale, returning
1769 * all non-ASCII characters unchanged, even if they are upper case
1770 * letters in a particular character set. Also unlike the standard
1771 * library function, this takes and returns a char, not an int, so
1772 * don't call it on %EOF but no need to worry about casting to #guchar
1773 * before passing a possibly non-ASCII character in.
1774 *
1775 * Returns: the result of converting @c to upper case. If @c is not
1776 * an ASCII lower case letter, @c is returned unchanged.
1777 */
1778 gchar
g_ascii_toupper(gchar c)1779 g_ascii_toupper (gchar c)
1780 {
1781 return g_ascii_islower (c) ? c - 'a' + 'A' : c;
1782 }
1783
1784 /**
1785 * g_ascii_digit_value:
1786 * @c: an ASCII character
1787 *
1788 * Determines the numeric value of a character as a decimal digit.
1789 * Differs from g_unichar_digit_value() because it takes a char, so
1790 * there's no worry about sign extension if characters are signed.
1791 *
1792 * Returns: If @c is a decimal digit (according to g_ascii_isdigit()),
1793 * its numeric value. Otherwise, -1.
1794 */
1795 int
g_ascii_digit_value(gchar c)1796 g_ascii_digit_value (gchar c)
1797 {
1798 if (g_ascii_isdigit (c))
1799 return c - '0';
1800 return -1;
1801 }
1802
1803 /**
1804 * g_ascii_xdigit_value:
1805 * @c: an ASCII character.
1806 *
1807 * Determines the numeric value of a character as a hexadecimal
1808 * digit. Differs from g_unichar_xdigit_value() because it takes
1809 * a char, so there's no worry about sign extension if characters
1810 * are signed.
1811 *
1812 * Returns: If @c is a hex digit (according to g_ascii_isxdigit()),
1813 * its numeric value. Otherwise, -1.
1814 */
1815 int
g_ascii_xdigit_value(gchar c)1816 g_ascii_xdigit_value (gchar c)
1817 {
1818 if (c >= 'A' && c <= 'F')
1819 return c - 'A' + 10;
1820 if (c >= 'a' && c <= 'f')
1821 return c - 'a' + 10;
1822 return g_ascii_digit_value (c);
1823 }
1824
1825 /**
1826 * g_ascii_strcasecmp:
1827 * @s1: string to compare with @s2
1828 * @s2: string to compare with @s1
1829 *
1830 * Compare two strings, ignoring the case of ASCII characters.
1831 *
1832 * Unlike the BSD strcasecmp() function, this only recognizes standard
1833 * ASCII letters and ignores the locale, treating all non-ASCII
1834 * bytes as if they are not letters.
1835 *
1836 * This function should be used only on strings that are known to be
1837 * in encodings where the bytes corresponding to ASCII letters always
1838 * represent themselves. This includes UTF-8 and the ISO-8859-*
1839 * charsets, but not for instance double-byte encodings like the
1840 * Windows Codepage 932, where the trailing bytes of double-byte
1841 * characters include all ASCII letters. If you compare two CP932
1842 * strings using this function, you will get false matches.
1843 *
1844 * Both @s1 and @s2 must be non-%NULL.
1845 *
1846 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1847 * or a positive value if @s1 > @s2.
1848 */
1849 gint
g_ascii_strcasecmp(const gchar * s1,const gchar * s2)1850 g_ascii_strcasecmp (const gchar *s1,
1851 const gchar *s2)
1852 {
1853 gint c1, c2;
1854
1855 g_return_val_if_fail (s1 != NULL, 0);
1856 g_return_val_if_fail (s2 != NULL, 0);
1857
1858 while (*s1 && *s2)
1859 {
1860 c1 = (gint)(guchar) TOLOWER (*s1);
1861 c2 = (gint)(guchar) TOLOWER (*s2);
1862 if (c1 != c2)
1863 return (c1 - c2);
1864 s1++; s2++;
1865 }
1866
1867 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1868 }
1869
1870 /**
1871 * g_ascii_strncasecmp:
1872 * @s1: string to compare with @s2
1873 * @s2: string to compare with @s1
1874 * @n: number of characters to compare
1875 *
1876 * Compare @s1 and @s2, ignoring the case of ASCII characters and any
1877 * characters after the first @n in each string.
1878 *
1879 * Unlike the BSD strcasecmp() function, this only recognizes standard
1880 * ASCII letters and ignores the locale, treating all non-ASCII
1881 * characters as if they are not letters.
1882 *
1883 * The same warning as in g_ascii_strcasecmp() applies: Use this
1884 * function only on strings known to be in encodings where bytes
1885 * corresponding to ASCII letters always represent themselves.
1886 *
1887 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1888 * or a positive value if @s1 > @s2.
1889 */
1890 gint
g_ascii_strncasecmp(const gchar * s1,const gchar * s2,gsize n)1891 g_ascii_strncasecmp (const gchar *s1,
1892 const gchar *s2,
1893 gsize n)
1894 {
1895 gint c1, c2;
1896
1897 g_return_val_if_fail (s1 != NULL, 0);
1898 g_return_val_if_fail (s2 != NULL, 0);
1899
1900 while (n && *s1 && *s2)
1901 {
1902 n -= 1;
1903 c1 = (gint)(guchar) TOLOWER (*s1);
1904 c2 = (gint)(guchar) TOLOWER (*s2);
1905 if (c1 != c2)
1906 return (c1 - c2);
1907 s1++; s2++;
1908 }
1909
1910 if (n)
1911 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
1912 else
1913 return 0;
1914 }
1915
1916 /**
1917 * g_strcasecmp:
1918 * @s1: a string
1919 * @s2: a string to compare with @s1
1920 *
1921 * A case-insensitive string comparison, corresponding to the standard
1922 * strcasecmp() function on platforms which support it.
1923 *
1924 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1925 * or a positive value if @s1 > @s2.
1926 *
1927 * Deprecated:2.2: See g_strncasecmp() for a discussion of why this
1928 * function is deprecated and how to replace it.
1929 */
1930 gint
g_strcasecmp(const gchar * s1,const gchar * s2)1931 g_strcasecmp (const gchar *s1,
1932 const gchar *s2)
1933 {
1934 #ifdef HAVE_STRCASECMP
1935 g_return_val_if_fail (s1 != NULL, 0);
1936 g_return_val_if_fail (s2 != NULL, 0);
1937
1938 return strcasecmp (s1, s2);
1939 #else
1940 gint c1, c2;
1941
1942 g_return_val_if_fail (s1 != NULL, 0);
1943 g_return_val_if_fail (s2 != NULL, 0);
1944
1945 while (*s1 && *s2)
1946 {
1947 /* According to A. Cox, some platforms have islower's that
1948 * don't work right on non-uppercase
1949 */
1950 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
1951 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
1952 if (c1 != c2)
1953 return (c1 - c2);
1954 s1++; s2++;
1955 }
1956
1957 return (((gint)(guchar) *s1) - ((gint)(guchar) *s2));
1958 #endif
1959 }
1960
1961 /**
1962 * g_strncasecmp:
1963 * @s1: a string
1964 * @s2: a string to compare with @s1
1965 * @n: the maximum number of characters to compare
1966 *
1967 * A case-insensitive string comparison, corresponding to the standard
1968 * strncasecmp() function on platforms which support it. It is similar
1969 * to g_strcasecmp() except it only compares the first @n characters of
1970 * the strings.
1971 *
1972 * Returns: 0 if the strings match, a negative value if @s1 < @s2,
1973 * or a positive value if @s1 > @s2.
1974 *
1975 * Deprecated:2.2: The problem with g_strncasecmp() is that it does
1976 * the comparison by calling toupper()/tolower(). These functions
1977 * are locale-specific and operate on single bytes. However, it is
1978 * impossible to handle things correctly from an internationalization
1979 * standpoint by operating on bytes, since characters may be multibyte.
1980 * Thus g_strncasecmp() is broken if your string is guaranteed to be
1981 * ASCII, since it is locale-sensitive, and it's broken if your string
1982 * is localized, since it doesn't work on many encodings at all,
1983 * including UTF-8, EUC-JP, etc.
1984 *
1985 * There are therefore two replacement techniques: g_ascii_strncasecmp(),
1986 * which only works on ASCII and is not locale-sensitive, and
1987 * g_utf8_casefold() followed by strcmp() on the resulting strings,
1988 * which is good for case-insensitive sorting of UTF-8.
1989 */
1990 gint
g_strncasecmp(const gchar * s1,const gchar * s2,guint n)1991 g_strncasecmp (const gchar *s1,
1992 const gchar *s2,
1993 guint n)
1994 {
1995 #ifdef HAVE_STRNCASECMP
1996 return strncasecmp (s1, s2, n);
1997 #else
1998 gint c1, c2;
1999
2000 g_return_val_if_fail (s1 != NULL, 0);
2001 g_return_val_if_fail (s2 != NULL, 0);
2002
2003 while (n && *s1 && *s2)
2004 {
2005 n -= 1;
2006 /* According to A. Cox, some platforms have islower's that
2007 * don't work right on non-uppercase
2008 */
2009 c1 = isupper ((guchar)*s1) ? tolower ((guchar)*s1) : *s1;
2010 c2 = isupper ((guchar)*s2) ? tolower ((guchar)*s2) : *s2;
2011 if (c1 != c2)
2012 return (c1 - c2);
2013 s1++; s2++;
2014 }
2015
2016 if (n)
2017 return (((gint) (guchar) *s1) - ((gint) (guchar) *s2));
2018 else
2019 return 0;
2020 #endif
2021 }
2022
2023 /**
2024 * g_strdelimit:
2025 * @string: the string to convert
2026 * @delimiters: (nullable): a string containing the current delimiters,
2027 * or %NULL to use the standard delimiters defined in #G_STR_DELIMITERS
2028 * @new_delimiter: the new delimiter character
2029 *
2030 * Converts any delimiter characters in @string to @new_delimiter.
2031 * Any characters in @string which are found in @delimiters are
2032 * changed to the @new_delimiter character. Modifies @string in place,
2033 * and returns @string itself, not a copy. The return value is to
2034 * allow nesting such as
2035 * |[<!-- language="C" -->
2036 * g_ascii_strup (g_strdelimit (str, "abc", '?'))
2037 * ]|
2038 *
2039 * In order to modify a copy, you may use `g_strdup()`:
2040 * |[<!-- language="C" -->
2041 * reformatted = g_strdelimit (g_strdup (const_str), "abc", '?');
2042 * ...
2043 * g_free (reformatted);
2044 * ]|
2045 *
2046 * Returns: @string
2047 */
2048 gchar *
g_strdelimit(gchar * string,const gchar * delimiters,gchar new_delim)2049 g_strdelimit (gchar *string,
2050 const gchar *delimiters,
2051 gchar new_delim)
2052 {
2053 gchar *c;
2054
2055 g_return_val_if_fail (string != NULL, NULL);
2056
2057 if (!delimiters)
2058 delimiters = G_STR_DELIMITERS;
2059
2060 for (c = string; *c; c++)
2061 {
2062 if (strchr (delimiters, *c))
2063 *c = new_delim;
2064 }
2065
2066 return string;
2067 }
2068
2069 /**
2070 * g_strcanon:
2071 * @string: a nul-terminated array of bytes
2072 * @valid_chars: bytes permitted in @string
2073 * @substitutor: replacement character for disallowed bytes
2074 *
2075 * For each character in @string, if the character is not in @valid_chars,
2076 * replaces the character with @substitutor. Modifies @string in place,
2077 * and return @string itself, not a copy. The return value is to allow
2078 * nesting such as
2079 * |[<!-- language="C" -->
2080 * g_ascii_strup (g_strcanon (str, "abc", '?'))
2081 * ]|
2082 *
2083 * In order to modify a copy, you may use `g_strdup()`:
2084 * |[<!-- language="C" -->
2085 * reformatted = g_strcanon (g_strdup (const_str), "abc", '?');
2086 * ...
2087 * g_free (reformatted);
2088 * ]|
2089 *
2090 * Returns: @string
2091 */
2092 gchar *
g_strcanon(gchar * string,const gchar * valid_chars,gchar substitutor)2093 g_strcanon (gchar *string,
2094 const gchar *valid_chars,
2095 gchar substitutor)
2096 {
2097 gchar *c;
2098
2099 g_return_val_if_fail (string != NULL, NULL);
2100 g_return_val_if_fail (valid_chars != NULL, NULL);
2101
2102 for (c = string; *c; c++)
2103 {
2104 if (!strchr (valid_chars, *c))
2105 *c = substitutor;
2106 }
2107
2108 return string;
2109 }
2110
2111 /**
2112 * g_strcompress:
2113 * @source: a string to compress
2114 *
2115 * Replaces all escaped characters with their one byte equivalent.
2116 *
2117 * This function does the reverse conversion of g_strescape().
2118 *
2119 * Returns: a newly-allocated copy of @source with all escaped
2120 * character compressed
2121 */
2122 gchar *
g_strcompress(const gchar * source)2123 g_strcompress (const gchar *source)
2124 {
2125 const gchar *p = source, *octal;
2126 gchar *dest;
2127 gchar *q;
2128
2129 g_return_val_if_fail (source != NULL, NULL);
2130
2131 dest = g_malloc (strlen (source) + 1);
2132 q = dest;
2133
2134 while (*p)
2135 {
2136 if (*p == '\\')
2137 {
2138 p++;
2139 switch (*p)
2140 {
2141 case '\0':
2142 g_warning ("g_strcompress: trailing \\");
2143 goto out;
2144 case '0': case '1': case '2': case '3': case '4':
2145 case '5': case '6': case '7':
2146 *q = 0;
2147 octal = p;
2148 while ((p < octal + 3) && (*p >= '0') && (*p <= '7'))
2149 {
2150 *q = (*q * 8) + (*p - '0');
2151 p++;
2152 }
2153 q++;
2154 p--;
2155 break;
2156 case 'b':
2157 *q++ = '\b';
2158 break;
2159 case 'f':
2160 *q++ = '\f';
2161 break;
2162 case 'n':
2163 *q++ = '\n';
2164 break;
2165 case 'r':
2166 *q++ = '\r';
2167 break;
2168 case 't':
2169 *q++ = '\t';
2170 break;
2171 case 'v':
2172 *q++ = '\v';
2173 break;
2174 default: /* Also handles \" and \\ */
2175 *q++ = *p;
2176 break;
2177 }
2178 }
2179 else
2180 *q++ = *p;
2181 p++;
2182 }
2183 out:
2184 *q = 0;
2185
2186 return dest;
2187 }
2188
2189 /**
2190 * g_strescape:
2191 * @source: a string to escape
2192 * @exceptions: (nullable): a string of characters not to escape in @source
2193 *
2194 * Escapes the special characters '\b', '\f', '\n', '\r', '\t', '\v', '\'
2195 * and '"' in the string @source by inserting a '\' before
2196 * them. Additionally all characters in the range 0x01-0x1F (everything
2197 * below SPACE) and in the range 0x7F-0xFF (all non-ASCII chars) are
2198 * replaced with a '\' followed by their octal representation.
2199 * Characters supplied in @exceptions are not escaped.
2200 *
2201 * g_strcompress() does the reverse conversion.
2202 *
2203 * Returns: a newly-allocated copy of @source with certain
2204 * characters escaped. See above.
2205 */
2206 gchar *
g_strescape(const gchar * source,const gchar * exceptions)2207 g_strescape (const gchar *source,
2208 const gchar *exceptions)
2209 {
2210 const guchar *p;
2211 gchar *dest;
2212 gchar *q;
2213 guchar excmap[256];
2214
2215 g_return_val_if_fail (source != NULL, NULL);
2216
2217 p = (guchar *) source;
2218 /* Each source byte needs maximally four destination chars (\777) */
2219 q = dest = g_malloc (strlen (source) * 4 + 1);
2220
2221 memset (excmap, 0, 256);
2222 if (exceptions)
2223 {
2224 guchar *e = (guchar *) exceptions;
2225
2226 while (*e)
2227 {
2228 excmap[*e] = 1;
2229 e++;
2230 }
2231 }
2232
2233 while (*p)
2234 {
2235 if (excmap[*p])
2236 *q++ = *p;
2237 else
2238 {
2239 switch (*p)
2240 {
2241 case '\b':
2242 *q++ = '\\';
2243 *q++ = 'b';
2244 break;
2245 case '\f':
2246 *q++ = '\\';
2247 *q++ = 'f';
2248 break;
2249 case '\n':
2250 *q++ = '\\';
2251 *q++ = 'n';
2252 break;
2253 case '\r':
2254 *q++ = '\\';
2255 *q++ = 'r';
2256 break;
2257 case '\t':
2258 *q++ = '\\';
2259 *q++ = 't';
2260 break;
2261 case '\v':
2262 *q++ = '\\';
2263 *q++ = 'v';
2264 break;
2265 case '\\':
2266 *q++ = '\\';
2267 *q++ = '\\';
2268 break;
2269 case '"':
2270 *q++ = '\\';
2271 *q++ = '"';
2272 break;
2273 default:
2274 if ((*p < ' ') || (*p >= 0177))
2275 {
2276 *q++ = '\\';
2277 *q++ = '0' + (((*p) >> 6) & 07);
2278 *q++ = '0' + (((*p) >> 3) & 07);
2279 *q++ = '0' + ((*p) & 07);
2280 }
2281 else
2282 *q++ = *p;
2283 break;
2284 }
2285 }
2286 p++;
2287 }
2288 *q = 0;
2289 return dest;
2290 }
2291
2292 /**
2293 * g_strchug:
2294 * @string: a string to remove the leading whitespace from
2295 *
2296 * Removes leading whitespace from a string, by moving the rest
2297 * of the characters forward.
2298 *
2299 * This function doesn't allocate or reallocate any memory;
2300 * it modifies @string in place. Therefore, it cannot be used on
2301 * statically allocated strings.
2302 *
2303 * The pointer to @string is returned to allow the nesting of functions.
2304 *
2305 * Also see g_strchomp() and g_strstrip().
2306 *
2307 * Returns: @string
2308 */
2309 gchar *
g_strchug(gchar * string)2310 g_strchug (gchar *string)
2311 {
2312 guchar *start;
2313
2314 g_return_val_if_fail (string != NULL, NULL);
2315
2316 for (start = (guchar*) string; *start && g_ascii_isspace (*start); start++)
2317 ;
2318
2319 memmove (string, start, strlen ((gchar *) start) + 1);
2320
2321 return string;
2322 }
2323
2324 /**
2325 * g_strchomp:
2326 * @string: a string to remove the trailing whitespace from
2327 *
2328 * Removes trailing whitespace from a string.
2329 *
2330 * This function doesn't allocate or reallocate any memory;
2331 * it modifies @string in place. Therefore, it cannot be used
2332 * on statically allocated strings.
2333 *
2334 * The pointer to @string is returned to allow the nesting of functions.
2335 *
2336 * Also see g_strchug() and g_strstrip().
2337 *
2338 * Returns: @string
2339 */
2340 gchar *
g_strchomp(gchar * string)2341 g_strchomp (gchar *string)
2342 {
2343 gsize len;
2344
2345 g_return_val_if_fail (string != NULL, NULL);
2346
2347 len = strlen (string);
2348 while (len--)
2349 {
2350 if (g_ascii_isspace ((guchar) string[len]))
2351 string[len] = '\0';
2352 else
2353 break;
2354 }
2355
2356 return string;
2357 }
2358
2359 /**
2360 * g_strsplit:
2361 * @string: a string to split
2362 * @delimiter: a string which specifies the places at which to split
2363 * the string. The delimiter is not included in any of the resulting
2364 * strings, unless @max_tokens is reached.
2365 * @max_tokens: the maximum number of pieces to split @string into.
2366 * If this is less than 1, the string is split completely.
2367 *
2368 * Splits a string into a maximum of @max_tokens pieces, using the given
2369 * @delimiter. If @max_tokens is reached, the remainder of @string is
2370 * appended to the last token.
2371 *
2372 * As an example, the result of g_strsplit (":a:bc::d:", ":", -1) is a
2373 * %NULL-terminated vector containing the six strings "", "a", "bc", "", "d"
2374 * and "".
2375 *
2376 * As a special case, the result of splitting the empty string "" is an empty
2377 * vector, not a vector containing a single string. The reason for this
2378 * special case is that being able to represent an empty vector is typically
2379 * more useful than consistent handling of empty elements. If you do need
2380 * to represent empty elements, you'll need to check for the empty string
2381 * before calling g_strsplit().
2382 *
2383 * Returns: a newly-allocated %NULL-terminated array of strings. Use
2384 * g_strfreev() to free it.
2385 */
2386 gchar**
g_strsplit(const gchar * string,const gchar * delimiter,gint max_tokens)2387 g_strsplit (const gchar *string,
2388 const gchar *delimiter,
2389 gint max_tokens)
2390 {
2391 char *s;
2392 const gchar *remainder;
2393 GPtrArray *string_list;
2394
2395 g_return_val_if_fail (string != NULL, NULL);
2396 g_return_val_if_fail (delimiter != NULL, NULL);
2397 g_return_val_if_fail (delimiter[0] != '\0', NULL);
2398
2399 if (max_tokens < 1)
2400 max_tokens = G_MAXINT;
2401
2402 string_list = g_ptr_array_new ();
2403 remainder = string;
2404 s = strstr (remainder, delimiter);
2405 if (s)
2406 {
2407 gsize delimiter_len = strlen (delimiter);
2408
2409 while (--max_tokens && s)
2410 {
2411 gsize len;
2412
2413 len = s - remainder;
2414 g_ptr_array_add (string_list, g_strndup (remainder, len));
2415 remainder = s + delimiter_len;
2416 s = strstr (remainder, delimiter);
2417 }
2418 }
2419 if (*string)
2420 g_ptr_array_add (string_list, g_strdup (remainder));
2421
2422 g_ptr_array_add (string_list, NULL);
2423
2424 return (char **) g_ptr_array_free (string_list, FALSE);
2425 }
2426
2427 /**
2428 * g_strsplit_set:
2429 * @string: The string to be tokenized
2430 * @delimiters: A nul-terminated string containing bytes that are used
2431 * to split the string (it can accept an empty string, which will result
2432 * in no string splitting).
2433 * @max_tokens: The maximum number of tokens to split @string into.
2434 * If this is less than 1, the string is split completely
2435 *
2436 * Splits @string into a number of tokens not containing any of the characters
2437 * in @delimiter. A token is the (possibly empty) longest string that does not
2438 * contain any of the characters in @delimiters. If @max_tokens is reached, the
2439 * remainder is appended to the last token.
2440 *
2441 * For example the result of g_strsplit_set ("abc:def/ghi", ":/", -1) is a
2442 * %NULL-terminated vector containing the three strings "abc", "def",
2443 * and "ghi".
2444 *
2445 * The result of g_strsplit_set (":def/ghi:", ":/", -1) is a %NULL-terminated
2446 * vector containing the four strings "", "def", "ghi", and "".
2447 *
2448 * As a special case, the result of splitting the empty string "" is an empty
2449 * vector, not a vector containing a single string. The reason for this
2450 * special case is that being able to represent an empty vector is typically
2451 * more useful than consistent handling of empty elements. If you do need
2452 * to represent empty elements, you'll need to check for the empty string
2453 * before calling g_strsplit_set().
2454 *
2455 * Note that this function works on bytes not characters, so it can't be used
2456 * to delimit UTF-8 strings for anything but ASCII characters.
2457 *
2458 * Returns: a newly-allocated %NULL-terminated array of strings. Use
2459 * g_strfreev() to free it.
2460 *
2461 * Since: 2.4
2462 **/
2463 gchar **
g_strsplit_set(const gchar * string,const gchar * delimiters,gint max_tokens)2464 g_strsplit_set (const gchar *string,
2465 const gchar *delimiters,
2466 gint max_tokens)
2467 {
2468 guint8 delim_table[256]; /* 1 = index is a separator; 0 otherwise */
2469 GSList *tokens, *list;
2470 gint n_tokens;
2471 const gchar *s;
2472 const gchar *current;
2473 gchar *token;
2474 gchar **result;
2475
2476 g_return_val_if_fail (string != NULL, NULL);
2477 g_return_val_if_fail (delimiters != NULL, NULL);
2478
2479 if (max_tokens < 1)
2480 max_tokens = G_MAXINT;
2481
2482 if (*string == '\0')
2483 {
2484 result = g_new (char *, 1);
2485 result[0] = NULL;
2486 return result;
2487 }
2488
2489 /* Check if each character in @string is a separator, by indexing by the
2490 * character value into the @delim_table, which has value 1 stored at an index
2491 * if that index is a separator. */
2492 memset (delim_table, FALSE, sizeof (delim_table));
2493 for (s = delimiters; *s != '\0'; ++s)
2494 delim_table[*(guchar *)s] = TRUE;
2495
2496 tokens = NULL;
2497 n_tokens = 0;
2498
2499 s = current = string;
2500 while (*s != '\0')
2501 {
2502 if (delim_table[*(guchar *)s] && n_tokens + 1 < max_tokens)
2503 {
2504 token = g_strndup (current, s - current);
2505 tokens = g_slist_prepend (tokens, token);
2506 ++n_tokens;
2507
2508 current = s + 1;
2509 }
2510
2511 ++s;
2512 }
2513
2514 token = g_strndup (current, s - current);
2515 tokens = g_slist_prepend (tokens, token);
2516 ++n_tokens;
2517
2518 result = g_new (gchar *, n_tokens + 1);
2519
2520 result[n_tokens] = NULL;
2521 for (list = tokens; list != NULL; list = list->next)
2522 result[--n_tokens] = list->data;
2523
2524 g_slist_free (tokens);
2525
2526 return result;
2527 }
2528
2529 /**
2530 * GStrv:
2531 *
2532 * A typedef alias for gchar**. This is mostly useful when used together with
2533 * g_auto().
2534 */
2535
2536 /**
2537 * g_strfreev:
2538 * @str_array: (nullable): a %NULL-terminated array of strings to free
2539 *
2540 * Frees a %NULL-terminated array of strings, as well as each
2541 * string it contains.
2542 *
2543 * If @str_array is %NULL, this function simply returns.
2544 */
2545 void
g_strfreev(gchar ** str_array)2546 g_strfreev (gchar **str_array)
2547 {
2548 if (str_array)
2549 {
2550 gsize i;
2551
2552 for (i = 0; str_array[i] != NULL; i++)
2553 g_free (str_array[i]);
2554
2555 g_free (str_array);
2556 }
2557 }
2558
2559 /**
2560 * g_strdupv:
2561 * @str_array: (nullable): a %NULL-terminated array of strings
2562 *
2563 * Copies %NULL-terminated array of strings. The copy is a deep copy;
2564 * the new array should be freed by first freeing each string, then
2565 * the array itself. g_strfreev() does this for you. If called
2566 * on a %NULL value, g_strdupv() simply returns %NULL.
2567 *
2568 * Returns: (nullable): a new %NULL-terminated array of strings.
2569 */
2570 gchar**
g_strdupv(gchar ** str_array)2571 g_strdupv (gchar **str_array)
2572 {
2573 if (str_array)
2574 {
2575 gsize i;
2576 gchar **retval;
2577
2578 i = 0;
2579 while (str_array[i])
2580 ++i;
2581
2582 retval = g_new (gchar*, i + 1);
2583
2584 i = 0;
2585 while (str_array[i])
2586 {
2587 retval[i] = g_strdup (str_array[i]);
2588 ++i;
2589 }
2590 retval[i] = NULL;
2591
2592 return retval;
2593 }
2594 else
2595 return NULL;
2596 }
2597
2598 /**
2599 * g_strjoinv:
2600 * @separator: (nullable): a string to insert between each of the
2601 * strings, or %NULL
2602 * @str_array: a %NULL-terminated array of strings to join
2603 *
2604 * Joins a number of strings together to form one long string, with the
2605 * optional @separator inserted between each of them. The returned string
2606 * should be freed with g_free().
2607 *
2608 * If @str_array has no items, the return value will be an
2609 * empty string. If @str_array contains a single item, @separator will not
2610 * appear in the resulting string.
2611 *
2612 * Returns: a newly-allocated string containing all of the strings joined
2613 * together, with @separator between them
2614 */
2615 gchar*
g_strjoinv(const gchar * separator,gchar ** str_array)2616 g_strjoinv (const gchar *separator,
2617 gchar **str_array)
2618 {
2619 gchar *string;
2620 gchar *ptr;
2621
2622 g_return_val_if_fail (str_array != NULL, NULL);
2623
2624 if (separator == NULL)
2625 separator = "";
2626
2627 if (*str_array)
2628 {
2629 gsize i;
2630 gsize len;
2631 gsize separator_len;
2632
2633 separator_len = strlen (separator);
2634 /* First part, getting length */
2635 len = 1 + strlen (str_array[0]);
2636 for (i = 1; str_array[i] != NULL; i++)
2637 len += strlen (str_array[i]);
2638 len += separator_len * (i - 1);
2639
2640 /* Second part, building string */
2641 string = g_new (gchar, len);
2642 ptr = g_stpcpy (string, *str_array);
2643 for (i = 1; str_array[i] != NULL; i++)
2644 {
2645 ptr = g_stpcpy (ptr, separator);
2646 ptr = g_stpcpy (ptr, str_array[i]);
2647 }
2648 }
2649 else
2650 string = g_strdup ("");
2651
2652 return string;
2653 }
2654
2655 /**
2656 * g_strjoin:
2657 * @separator: (nullable): a string to insert between each of the
2658 * strings, or %NULL
2659 * @...: a %NULL-terminated list of strings to join
2660 *
2661 * Joins a number of strings together to form one long string, with the
2662 * optional @separator inserted between each of them. The returned string
2663 * should be freed with g_free().
2664 *
2665 * Returns: a newly-allocated string containing all of the strings joined
2666 * together, with @separator between them
2667 */
2668 gchar*
g_strjoin(const gchar * separator,...)2669 g_strjoin (const gchar *separator,
2670 ...)
2671 {
2672 gchar *string, *s;
2673 va_list args;
2674 gsize len;
2675 gsize separator_len;
2676 gchar *ptr;
2677
2678 if (separator == NULL)
2679 separator = "";
2680
2681 separator_len = strlen (separator);
2682
2683 va_start (args, separator);
2684
2685 s = va_arg (args, gchar*);
2686
2687 if (s)
2688 {
2689 /* First part, getting length */
2690 len = 1 + strlen (s);
2691
2692 s = va_arg (args, gchar*);
2693 while (s)
2694 {
2695 len += separator_len + strlen (s);
2696 s = va_arg (args, gchar*);
2697 }
2698 va_end (args);
2699
2700 /* Second part, building string */
2701 string = g_new (gchar, len);
2702
2703 va_start (args, separator);
2704
2705 s = va_arg (args, gchar*);
2706 ptr = g_stpcpy (string, s);
2707
2708 s = va_arg (args, gchar*);
2709 while (s)
2710 {
2711 ptr = g_stpcpy (ptr, separator);
2712 ptr = g_stpcpy (ptr, s);
2713 s = va_arg (args, gchar*);
2714 }
2715 }
2716 else
2717 string = g_strdup ("");
2718
2719 va_end (args);
2720
2721 return string;
2722 }
2723
2724
2725 /**
2726 * g_strstr_len:
2727 * @haystack: a nul-terminated string
2728 * @haystack_len: the maximum length of @haystack in bytes. A length of -1
2729 * can be used to mean "search the entire string", like `strstr()`.
2730 * @needle: the string to search for
2731 *
2732 * Searches the string @haystack for the first occurrence
2733 * of the string @needle, limiting the length of the search
2734 * to @haystack_len.
2735 *
2736 * Returns: a pointer to the found occurrence, or
2737 * %NULL if not found.
2738 */
2739 gchar *
g_strstr_len(const gchar * haystack,gssize haystack_len,const gchar * needle)2740 g_strstr_len (const gchar *haystack,
2741 gssize haystack_len,
2742 const gchar *needle)
2743 {
2744 g_return_val_if_fail (haystack != NULL, NULL);
2745 g_return_val_if_fail (needle != NULL, NULL);
2746
2747 if (haystack_len < 0)
2748 return strstr (haystack, needle);
2749 else
2750 {
2751 const gchar *p = haystack;
2752 gsize needle_len = strlen (needle);
2753 gsize haystack_len_unsigned = haystack_len;
2754 const gchar *end;
2755 gsize i;
2756
2757 if (needle_len == 0)
2758 return (gchar *)haystack;
2759
2760 if (haystack_len_unsigned < needle_len)
2761 return NULL;
2762
2763 end = haystack + haystack_len - needle_len;
2764
2765 while (p <= end && *p)
2766 {
2767 for (i = 0; i < needle_len; i++)
2768 if (p[i] != needle[i])
2769 goto next;
2770
2771 return (gchar *)p;
2772
2773 next:
2774 p++;
2775 }
2776
2777 return NULL;
2778 }
2779 }
2780
2781 /**
2782 * g_strrstr:
2783 * @haystack: a nul-terminated string
2784 * @needle: the nul-terminated string to search for
2785 *
2786 * Searches the string @haystack for the last occurrence
2787 * of the string @needle.
2788 *
2789 * Returns: a pointer to the found occurrence, or
2790 * %NULL if not found.
2791 */
2792 gchar *
g_strrstr(const gchar * haystack,const gchar * needle)2793 g_strrstr (const gchar *haystack,
2794 const gchar *needle)
2795 {
2796 gsize i;
2797 gsize needle_len;
2798 gsize haystack_len;
2799 const gchar *p;
2800
2801 g_return_val_if_fail (haystack != NULL, NULL);
2802 g_return_val_if_fail (needle != NULL, NULL);
2803
2804 needle_len = strlen (needle);
2805 haystack_len = strlen (haystack);
2806
2807 if (needle_len == 0)
2808 return (gchar *)haystack;
2809
2810 if (haystack_len < needle_len)
2811 return NULL;
2812
2813 p = haystack + haystack_len - needle_len;
2814
2815 while (p >= haystack)
2816 {
2817 for (i = 0; i < needle_len; i++)
2818 if (p[i] != needle[i])
2819 goto next;
2820
2821 return (gchar *)p;
2822
2823 next:
2824 p--;
2825 }
2826
2827 return NULL;
2828 }
2829
2830 /**
2831 * g_strrstr_len:
2832 * @haystack: a nul-terminated string
2833 * @haystack_len: the maximum length of @haystack in bytes. A length of -1
2834 * can be used to mean "search the entire string", like g_strrstr().
2835 * @needle: the nul-terminated string to search for
2836 *
2837 * Searches the string @haystack for the last occurrence
2838 * of the string @needle, limiting the length of the search
2839 * to @haystack_len.
2840 *
2841 * Returns: a pointer to the found occurrence, or
2842 * %NULL if not found.
2843 */
2844 gchar *
g_strrstr_len(const gchar * haystack,gssize haystack_len,const gchar * needle)2845 g_strrstr_len (const gchar *haystack,
2846 gssize haystack_len,
2847 const gchar *needle)
2848 {
2849 g_return_val_if_fail (haystack != NULL, NULL);
2850 g_return_val_if_fail (needle != NULL, NULL);
2851
2852 if (haystack_len < 0)
2853 return g_strrstr (haystack, needle);
2854 else
2855 {
2856 gsize needle_len = strlen (needle);
2857 const gchar *haystack_max = haystack + haystack_len;
2858 const gchar *p = haystack;
2859 gsize i;
2860
2861 while (p < haystack_max && *p)
2862 p++;
2863
2864 if (p < haystack + needle_len)
2865 return NULL;
2866
2867 p -= needle_len;
2868
2869 while (p >= haystack)
2870 {
2871 for (i = 0; i < needle_len; i++)
2872 if (p[i] != needle[i])
2873 goto next;
2874
2875 return (gchar *)p;
2876
2877 next:
2878 p--;
2879 }
2880
2881 return NULL;
2882 }
2883 }
2884
2885
2886 /**
2887 * g_str_has_suffix:
2888 * @str: a nul-terminated string
2889 * @suffix: the nul-terminated suffix to look for
2890 *
2891 * Looks whether the string @str ends with @suffix.
2892 *
2893 * Returns: %TRUE if @str end with @suffix, %FALSE otherwise.
2894 *
2895 * Since: 2.2
2896 */
2897 gboolean
g_str_has_suffix(const gchar * str,const gchar * suffix)2898 g_str_has_suffix (const gchar *str,
2899 const gchar *suffix)
2900 {
2901 gsize str_len;
2902 gsize suffix_len;
2903
2904 g_return_val_if_fail (str != NULL, FALSE);
2905 g_return_val_if_fail (suffix != NULL, FALSE);
2906
2907 str_len = strlen (str);
2908 suffix_len = strlen (suffix);
2909
2910 if (str_len < suffix_len)
2911 return FALSE;
2912
2913 return strcmp (str + str_len - suffix_len, suffix) == 0;
2914 }
2915
2916 /**
2917 * g_str_has_prefix:
2918 * @str: a nul-terminated string
2919 * @prefix: the nul-terminated prefix to look for
2920 *
2921 * Looks whether the string @str begins with @prefix.
2922 *
2923 * Returns: %TRUE if @str begins with @prefix, %FALSE otherwise.
2924 *
2925 * Since: 2.2
2926 */
2927 gboolean
g_str_has_prefix(const gchar * str,const gchar * prefix)2928 g_str_has_prefix (const gchar *str,
2929 const gchar *prefix)
2930 {
2931 g_return_val_if_fail (str != NULL, FALSE);
2932 g_return_val_if_fail (prefix != NULL, FALSE);
2933
2934 return strncmp (str, prefix, strlen (prefix)) == 0;
2935 }
2936
2937 /**
2938 * g_strv_length:
2939 * @str_array: a %NULL-terminated array of strings
2940 *
2941 * Returns the length of the given %NULL-terminated
2942 * string array @str_array. @str_array must not be %NULL.
2943 *
2944 * Returns: length of @str_array.
2945 *
2946 * Since: 2.6
2947 */
2948 guint
g_strv_length(gchar ** str_array)2949 g_strv_length (gchar **str_array)
2950 {
2951 guint i = 0;
2952
2953 g_return_val_if_fail (str_array != NULL, 0);
2954
2955 while (str_array[i])
2956 ++i;
2957
2958 return i;
2959 }
2960
2961 static void
index_add_folded(GPtrArray * array,const gchar * start,const gchar * end)2962 index_add_folded (GPtrArray *array,
2963 const gchar *start,
2964 const gchar *end)
2965 {
2966 gchar *normal;
2967
2968 normal = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL_COMPOSE);
2969
2970 /* TODO: Invent time machine. Converse with Mustafa Ataturk... */
2971 if (strstr (normal, "ı") || strstr (normal, "İ"))
2972 {
2973 gchar *s = normal;
2974 GString *tmp;
2975
2976 tmp = g_string_new (NULL);
2977
2978 while (*s)
2979 {
2980 gchar *i, *I, *e;
2981
2982 i = strstr (s, "ı");
2983 I = strstr (s, "İ");
2984
2985 if (!i && !I)
2986 break;
2987 else if (i && !I)
2988 e = i;
2989 else if (I && !i)
2990 e = I;
2991 else if (i < I)
2992 e = i;
2993 else
2994 e = I;
2995
2996 g_string_append_len (tmp, s, e - s);
2997 g_string_append_c (tmp, 'i');
2998 s = g_utf8_next_char (e);
2999 }
3000
3001 g_string_append (tmp, s);
3002 g_free (normal);
3003 normal = g_string_free (tmp, FALSE);
3004 }
3005
3006 g_ptr_array_add (array, g_utf8_casefold (normal, -1));
3007 g_free (normal);
3008 }
3009
3010 static gchar **
split_words(const gchar * value)3011 split_words (const gchar *value)
3012 {
3013 const gchar *start = NULL;
3014 GPtrArray *result;
3015 const gchar *s;
3016
3017 result = g_ptr_array_new ();
3018
3019 for (s = value; *s; s = g_utf8_next_char (s))
3020 {
3021 gunichar c = g_utf8_get_char (s);
3022
3023 if (start == NULL)
3024 {
3025 if (g_unichar_isalnum (c) || g_unichar_ismark (c))
3026 start = s;
3027 }
3028 else
3029 {
3030 if (!g_unichar_isalnum (c) && !g_unichar_ismark (c))
3031 {
3032 index_add_folded (result, start, s);
3033 start = NULL;
3034 }
3035 }
3036 }
3037
3038 if (start)
3039 index_add_folded (result, start, s);
3040
3041 g_ptr_array_add (result, NULL);
3042
3043 return (gchar **) g_ptr_array_free (result, FALSE);
3044 }
3045
3046 /**
3047 * g_str_tokenize_and_fold:
3048 * @string: a string
3049 * @translit_locale: (nullable): the language code (like 'de' or
3050 * 'en_GB') from which @string originates
3051 * @ascii_alternates: (out) (transfer full) (array zero-terminated=1): a
3052 * return location for ASCII alternates
3053 *
3054 * Tokenises @string and performs folding on each token.
3055 *
3056 * A token is a non-empty sequence of alphanumeric characters in the
3057 * source string, separated by non-alphanumeric characters. An
3058 * "alphanumeric" character for this purpose is one that matches
3059 * g_unichar_isalnum() or g_unichar_ismark().
3060 *
3061 * Each token is then (Unicode) normalised and case-folded. If
3062 * @ascii_alternates is non-%NULL and some of the returned tokens
3063 * contain non-ASCII characters, ASCII alternatives will be generated.
3064 *
3065 * The number of ASCII alternatives that are generated and the method
3066 * for doing so is unspecified, but @translit_locale (if specified) may
3067 * improve the transliteration if the language of the source string is
3068 * known.
3069 *
3070 * Returns: (transfer full) (array zero-terminated=1): the folded tokens
3071 *
3072 * Since: 2.40
3073 **/
3074 gchar **
g_str_tokenize_and_fold(const gchar * string,const gchar * translit_locale,gchar *** ascii_alternates)3075 g_str_tokenize_and_fold (const gchar *string,
3076 const gchar *translit_locale,
3077 gchar ***ascii_alternates)
3078 {
3079 gchar **result;
3080
3081 g_return_val_if_fail (string != NULL, NULL);
3082
3083 if (ascii_alternates && g_str_is_ascii (string))
3084 {
3085 *ascii_alternates = g_new0 (gchar *, 0 + 1);
3086 ascii_alternates = NULL;
3087 }
3088
3089 result = split_words (string);
3090
3091 if (ascii_alternates)
3092 {
3093 gint i, j, n;
3094
3095 n = g_strv_length (result);
3096 *ascii_alternates = g_new (gchar *, n + 1);
3097 j = 0;
3098
3099 for (i = 0; i < n; i++)
3100 {
3101 if (!g_str_is_ascii (result[i]))
3102 {
3103 gchar *composed;
3104 gchar *ascii;
3105 gint k;
3106
3107 composed = g_utf8_normalize (result[i], -1, G_NORMALIZE_ALL_COMPOSE);
3108
3109 ascii = g_str_to_ascii (composed, translit_locale);
3110
3111 /* Only accept strings that are now entirely alnums */
3112 for (k = 0; ascii[k]; k++)
3113 if (!g_ascii_isalnum (ascii[k]))
3114 break;
3115
3116 if (ascii[k] == '\0')
3117 /* Made it to the end... */
3118 (*ascii_alternates)[j++] = ascii;
3119 else
3120 g_free (ascii);
3121
3122 g_free (composed);
3123 }
3124 }
3125
3126 (*ascii_alternates)[j] = NULL;
3127 }
3128
3129 return result;
3130 }
3131
3132 /**
3133 * g_str_match_string:
3134 * @search_term: the search term from the user
3135 * @potential_hit: the text that may be a hit
3136 * @accept_alternates: %TRUE to accept ASCII alternates
3137 *
3138 * Checks if a search conducted for @search_term should match
3139 * @potential_hit.
3140 *
3141 * This function calls g_str_tokenize_and_fold() on both
3142 * @search_term and @potential_hit. ASCII alternates are never taken
3143 * for @search_term but will be taken for @potential_hit according to
3144 * the value of @accept_alternates.
3145 *
3146 * A hit occurs when each folded token in @search_term is a prefix of a
3147 * folded token from @potential_hit.
3148 *
3149 * Depending on how you're performing the search, it will typically be
3150 * faster to call g_str_tokenize_and_fold() on each string in
3151 * your corpus and build an index on the returned folded tokens, then
3152 * call g_str_tokenize_and_fold() on the search term and
3153 * perform lookups into that index.
3154 *
3155 * As some examples, searching for ‘fred’ would match the potential hit
3156 * ‘Smith, Fred’ and also ‘Frédéric’. Searching for ‘Fréd’ would match
3157 * ‘Frédéric’ but not ‘Frederic’ (due to the one-directional nature of
3158 * accent matching). Searching ‘fo’ would match ‘Foo’ and ‘Bar Foo
3159 * Baz’, but not ‘SFO’ (because no word has ‘fo’ as a prefix).
3160 *
3161 * Returns: %TRUE if @potential_hit is a hit
3162 *
3163 * Since: 2.40
3164 **/
3165 gboolean
g_str_match_string(const gchar * search_term,const gchar * potential_hit,gboolean accept_alternates)3166 g_str_match_string (const gchar *search_term,
3167 const gchar *potential_hit,
3168 gboolean accept_alternates)
3169 {
3170 gchar **alternates = NULL;
3171 gchar **term_tokens;
3172 gchar **hit_tokens;
3173 gboolean matched;
3174 gint i, j;
3175
3176 g_return_val_if_fail (search_term != NULL, FALSE);
3177 g_return_val_if_fail (potential_hit != NULL, FALSE);
3178
3179 term_tokens = g_str_tokenize_and_fold (search_term, NULL, NULL);
3180 hit_tokens = g_str_tokenize_and_fold (potential_hit, NULL, accept_alternates ? &alternates : NULL);
3181
3182 matched = TRUE;
3183
3184 for (i = 0; term_tokens[i]; i++)
3185 {
3186 for (j = 0; hit_tokens[j]; j++)
3187 if (g_str_has_prefix (hit_tokens[j], term_tokens[i]))
3188 goto one_matched;
3189
3190 if (accept_alternates)
3191 for (j = 0; alternates[j]; j++)
3192 if (g_str_has_prefix (alternates[j], term_tokens[i]))
3193 goto one_matched;
3194
3195 matched = FALSE;
3196 break;
3197
3198 one_matched:
3199 continue;
3200 }
3201
3202 g_strfreev (term_tokens);
3203 g_strfreev (hit_tokens);
3204 g_strfreev (alternates);
3205
3206 return matched;
3207 }
3208
3209 /**
3210 * g_strv_contains:
3211 * @strv: a %NULL-terminated array of strings
3212 * @str: a string
3213 *
3214 * Checks if @strv contains @str. @strv must not be %NULL.
3215 *
3216 * Returns: %TRUE if @str is an element of @strv, according to g_str_equal().
3217 *
3218 * Since: 2.44
3219 */
3220 gboolean
g_strv_contains(const gchar * const * strv,const gchar * str)3221 g_strv_contains (const gchar * const *strv,
3222 const gchar *str)
3223 {
3224 g_return_val_if_fail (strv != NULL, FALSE);
3225 g_return_val_if_fail (str != NULL, FALSE);
3226
3227 for (; *strv != NULL; strv++)
3228 {
3229 if (g_str_equal (str, *strv))
3230 return TRUE;
3231 }
3232
3233 return FALSE;
3234 }
3235
3236 /**
3237 * g_strv_equal:
3238 * @strv1: a %NULL-terminated array of strings
3239 * @strv2: another %NULL-terminated array of strings
3240 *
3241 * Checks if @strv1 and @strv2 contain exactly the same elements in exactly the
3242 * same order. Elements are compared using g_str_equal(). To match independently
3243 * of order, sort the arrays first (using g_qsort_with_data() or similar).
3244 *
3245 * Two empty arrays are considered equal. Neither @strv1 not @strv2 may be
3246 * %NULL.
3247 *
3248 * Returns: %TRUE if @strv1 and @strv2 are equal
3249 * Since: 2.60
3250 */
3251 gboolean
g_strv_equal(const gchar * const * strv1,const gchar * const * strv2)3252 g_strv_equal (const gchar * const *strv1,
3253 const gchar * const *strv2)
3254 {
3255 g_return_val_if_fail (strv1 != NULL, FALSE);
3256 g_return_val_if_fail (strv2 != NULL, FALSE);
3257
3258 if (strv1 == strv2)
3259 return TRUE;
3260
3261 for (; *strv1 != NULL && *strv2 != NULL; strv1++, strv2++)
3262 {
3263 if (!g_str_equal (*strv1, *strv2))
3264 return FALSE;
3265 }
3266
3267 return (*strv1 == NULL && *strv2 == NULL);
3268 }
3269
3270 static gboolean
str_has_sign(const gchar * str)3271 str_has_sign (const gchar *str)
3272 {
3273 return str[0] == '-' || str[0] == '+';
3274 }
3275
3276 static gboolean
str_has_hex_prefix(const gchar * str)3277 str_has_hex_prefix (const gchar *str)
3278 {
3279 return str[0] == '0' && g_ascii_tolower (str[1]) == 'x';
3280 }
3281
3282 /**
3283 * g_ascii_string_to_signed:
3284 * @str: a string
3285 * @base: base of a parsed number
3286 * @min: a lower bound (inclusive)
3287 * @max: an upper bound (inclusive)
3288 * @out_num: (out) (optional): a return location for a number
3289 * @error: a return location for #GError
3290 *
3291 * A convenience function for converting a string to a signed number.
3292 *
3293 * This function assumes that @str contains only a number of the given
3294 * @base that is within inclusive bounds limited by @min and @max. If
3295 * this is true, then the converted number is stored in @out_num. An
3296 * empty string is not a valid input. A string with leading or
3297 * trailing whitespace is also an invalid input.
3298 *
3299 * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3300 * not be prefixed with "0x" or "0X". Such a problem does not exist
3301 * for octal numbers, since they were usually prefixed with a zero
3302 * which does not change the value of the parsed number.
3303 *
3304 * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3305 * domain. If the input is invalid, the error code will be
3306 * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3307 * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3308 *
3309 * See g_ascii_strtoll() if you have more complex needs such as
3310 * parsing a string which starts with a number, but then has other
3311 * characters.
3312 *
3313 * Returns: %TRUE if @str was a number, otherwise %FALSE.
3314 *
3315 * Since: 2.54
3316 */
3317 gboolean
g_ascii_string_to_signed(const gchar * str,guint base,gint64 min,gint64 max,gint64 * out_num,GError ** error)3318 g_ascii_string_to_signed (const gchar *str,
3319 guint base,
3320 gint64 min,
3321 gint64 max,
3322 gint64 *out_num,
3323 GError **error)
3324 {
3325 gint64 number;
3326 const gchar *end_ptr = NULL;
3327 gint saved_errno = 0;
3328
3329 g_return_val_if_fail (str != NULL, FALSE);
3330 g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3331 g_return_val_if_fail (min <= max, FALSE);
3332 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3333
3334 if (str[0] == '\0')
3335 {
3336 g_set_error_literal (error,
3337 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3338 _("Empty string is not a number"));
3339 return FALSE;
3340 }
3341
3342 errno = 0;
3343 number = g_ascii_strtoll (str, (gchar **)&end_ptr, base);
3344 saved_errno = errno;
3345
3346 if (/* We do not allow leading whitespace, but g_ascii_strtoll
3347 * accepts it and just skips it, so we need to check for it
3348 * ourselves.
3349 */
3350 g_ascii_isspace (str[0]) ||
3351 /* We don't support hexadecimal numbers prefixed with 0x or
3352 * 0X.
3353 */
3354 (base == 16 &&
3355 (str_has_sign (str) ? str_has_hex_prefix (str + 1) : str_has_hex_prefix (str))) ||
3356 (saved_errno != 0 && saved_errno != ERANGE) ||
3357 end_ptr == NULL ||
3358 *end_ptr != '\0')
3359 {
3360 g_set_error (error,
3361 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3362 _("“%s” is not a signed number"), str);
3363 return FALSE;
3364 }
3365 if (saved_errno == ERANGE || number < min || number > max)
3366 {
3367 gchar *min_str = g_strdup_printf ("%" G_GINT64_FORMAT, min);
3368 gchar *max_str = g_strdup_printf ("%" G_GINT64_FORMAT, max);
3369
3370 g_set_error (error,
3371 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3372 _("Number “%s” is out of bounds [%s, %s]"),
3373 str, min_str, max_str);
3374 g_free (min_str);
3375 g_free (max_str);
3376 return FALSE;
3377 }
3378 if (out_num != NULL)
3379 *out_num = number;
3380 return TRUE;
3381 }
3382
3383 /**
3384 * g_ascii_string_to_unsigned:
3385 * @str: a string
3386 * @base: base of a parsed number
3387 * @min: a lower bound (inclusive)
3388 * @max: an upper bound (inclusive)
3389 * @out_num: (out) (optional): a return location for a number
3390 * @error: a return location for #GError
3391 *
3392 * A convenience function for converting a string to an unsigned number.
3393 *
3394 * This function assumes that @str contains only a number of the given
3395 * @base that is within inclusive bounds limited by @min and @max. If
3396 * this is true, then the converted number is stored in @out_num. An
3397 * empty string is not a valid input. A string with leading or
3398 * trailing whitespace is also an invalid input. A string with a leading sign
3399 * (`-` or `+`) is not a valid input for the unsigned parser.
3400 *
3401 * @base can be between 2 and 36 inclusive. Hexadecimal numbers must
3402 * not be prefixed with "0x" or "0X". Such a problem does not exist
3403 * for octal numbers, since they were usually prefixed with a zero
3404 * which does not change the value of the parsed number.
3405 *
3406 * Parsing failures result in an error with the %G_NUMBER_PARSER_ERROR
3407 * domain. If the input is invalid, the error code will be
3408 * %G_NUMBER_PARSER_ERROR_INVALID. If the parsed number is out of
3409 * bounds - %G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS.
3410 *
3411 * See g_ascii_strtoull() if you have more complex needs such as
3412 * parsing a string which starts with a number, but then has other
3413 * characters.
3414 *
3415 * Returns: %TRUE if @str was a number, otherwise %FALSE.
3416 *
3417 * Since: 2.54
3418 */
3419 gboolean
g_ascii_string_to_unsigned(const gchar * str,guint base,guint64 min,guint64 max,guint64 * out_num,GError ** error)3420 g_ascii_string_to_unsigned (const gchar *str,
3421 guint base,
3422 guint64 min,
3423 guint64 max,
3424 guint64 *out_num,
3425 GError **error)
3426 {
3427 guint64 number;
3428 const gchar *end_ptr = NULL;
3429 gint saved_errno = 0;
3430
3431 g_return_val_if_fail (str != NULL, FALSE);
3432 g_return_val_if_fail (base >= 2 && base <= 36, FALSE);
3433 g_return_val_if_fail (min <= max, FALSE);
3434 g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
3435
3436 if (str[0] == '\0')
3437 {
3438 g_set_error_literal (error,
3439 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3440 _("Empty string is not a number"));
3441 return FALSE;
3442 }
3443
3444 errno = 0;
3445 number = g_ascii_strtoull (str, (gchar **)&end_ptr, base);
3446 saved_errno = errno;
3447
3448 if (/* We do not allow leading whitespace, but g_ascii_strtoull
3449 * accepts it and just skips it, so we need to check for it
3450 * ourselves.
3451 */
3452 g_ascii_isspace (str[0]) ||
3453 /* Unsigned number should have no sign.
3454 */
3455 str_has_sign (str) ||
3456 /* We don't support hexadecimal numbers prefixed with 0x or
3457 * 0X.
3458 */
3459 (base == 16 && str_has_hex_prefix (str)) ||
3460 (saved_errno != 0 && saved_errno != ERANGE) ||
3461 end_ptr == NULL ||
3462 *end_ptr != '\0')
3463 {
3464 g_set_error (error,
3465 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_INVALID,
3466 _("“%s” is not an unsigned number"), str);
3467 return FALSE;
3468 }
3469 if (saved_errno == ERANGE || number < min || number > max)
3470 {
3471 gchar *min_str = g_strdup_printf ("%" G_GUINT64_FORMAT, min);
3472 gchar *max_str = g_strdup_printf ("%" G_GUINT64_FORMAT, max);
3473
3474 g_set_error (error,
3475 G_NUMBER_PARSER_ERROR, G_NUMBER_PARSER_ERROR_OUT_OF_BOUNDS,
3476 _("Number “%s” is out of bounds [%s, %s]"),
3477 str, min_str, max_str);
3478 g_free (min_str);
3479 g_free (max_str);
3480 return FALSE;
3481 }
3482 if (out_num != NULL)
3483 *out_num = number;
3484 return TRUE;
3485 }
3486
3487 G_DEFINE_QUARK (g-number-parser-error-quark, g_number_parser_error)
3488