• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GLIB - Library of useful routines for C programming
2  * Copyright (C) 1995-1997  Peter Mattis, Spencer Kimball and Josh MacDonald
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 /*
19  * Modified by the GLib Team and others 1997-2000.  See the AUTHORS
20  * file for a list of people on the GLib Team.  See the ChangeLog
21  * files for a list of changes.  These files are distributed with
22  * GLib at ftp://ftp.gtk.org/pub/gtk/.
23  */
24 
25 #include <string.h>
26 #include <glib.h>
27 
28 /* Test conversions between offsets and pointers */
29 
test_utf8(gconstpointer d)30 static void test_utf8 (gconstpointer d)
31 {
32   gint num_chars;
33   const gchar **p;
34   gint i, j;
35   const gchar *string = d;
36 
37   g_assert (g_utf8_validate (string, -1, NULL));
38 
39   num_chars = g_utf8_strlen (string, -1);
40 
41   p = (const gchar **) g_malloc (num_chars * sizeof (gchar *));
42 
43   p[0] = string;
44   for (i = 1; i < num_chars; i++)
45     p[i] = g_utf8_next_char (p[i-1]);
46 
47   for (i = 0; i < num_chars; i++)
48     for (j = 0; j < num_chars; j++)
49       {
50         g_assert (g_utf8_offset_to_pointer (p[i], j - i) == p[j]);
51         g_assert (g_utf8_pointer_to_offset (p[i], p[j]) == j - i);
52       }
53 
54   g_free (p);
55 }
56 
57 gchar *longline = "asdasdas dsaf asfd as fdasdf asfd asdf as dfas dfasdf a"
58 "asd fasdf asdf asdf asd fasfd as fdasfd asdf as fdççççççççças ffsd asfd as fdASASASAs As"
59 "Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfg sdfg sdf gsdfg sdfg sd"
60 "asd fasdf asdf asdf asd fasfd as fdaèèèèèèè òòòòòòòòòòòòsfd asdf as fdas ffsd asfd as fdASASASAs D"
61 "Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfgùùùùùùùùùùùùùù sdfg sdf gsdfg sdfg sd"
62 "asd fasdf asdf asdf asd fasfd as fdasfd asd@@@@@@@f as fdas ffsd asfd as fdASASASAs D "
63 "Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdf€€€€€€€€€€€€€€€€€€g sdfg sdfg sdf gsdfg sdfg sd"
64 "asd fasdf asdf asdf asd fasfd as fdasfd asdf as fdas ffsd asfd as fdASASASAs D"
65 "Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfg sdfg sdf gsdfg sdfg sd\n\nlalala\n";
66 
67 static void
test_length(void)68 test_length (void)
69 {
70   g_assert (g_utf8_strlen ("1234", -1) == 4);
71   g_assert (g_utf8_strlen ("1234", 0) == 0);
72   g_assert (g_utf8_strlen ("1234", 1) == 1);
73   g_assert (g_utf8_strlen ("1234", 2) == 2);
74   g_assert (g_utf8_strlen ("1234", 3) == 3);
75   g_assert (g_utf8_strlen ("1234", 4) == 4);
76   g_assert (g_utf8_strlen ("1234", 5) == 4);
77 
78   g_assert (g_utf8_strlen (longline, -1) == 762);
79   g_assert (g_utf8_strlen (longline, strlen (longline)) == 762);
80   g_assert (g_utf8_strlen (longline, 1024) == 762);
81 
82   g_assert (g_utf8_strlen (NULL, 0) == 0);
83 
84   g_assert (g_utf8_strlen ("a\340\250\201c", -1) == 3);
85   g_assert (g_utf8_strlen ("a\340\250\201c", 1) == 1);
86   g_assert (g_utf8_strlen ("a\340\250\201c", 2) == 1);
87   g_assert (g_utf8_strlen ("a\340\250\201c", 3) == 1);
88   g_assert (g_utf8_strlen ("a\340\250\201c", 4) == 2);
89   g_assert (g_utf8_strlen ("a\340\250\201c", 5) == 3);
90 }
91 
92 static void
test_find(void)93 test_find (void)
94 {
95   /* U+0B0B Oriya Letter Vocalic R (\340\254\213)
96    * U+10900 Phoenician Letter Alf (\360\220\244\200)
97    * U+0041 Latin Capital Letter A (\101)
98    * U+1EB6 Latin Capital Letter A With Breve And Dot Below (\341\272\266)
99    */
100 #define TEST_STR "\340\254\213\360\220\244\200\101\341\272\266\0\101"
101   const gsize str_size = sizeof TEST_STR;
102   const gchar *str = TEST_STR;
103   const gchar str_array[] = TEST_STR;
104   const gchar * volatile str_volatile = TEST_STR;
105 #undef TEST_STR
106   gchar *str_copy = g_malloc (str_size);
107   const gchar *p;
108   const gchar *q;
109   memcpy (str_copy, str, str_size);
110 
111 #define TEST_SET(STR)  \
112   G_STMT_START { \
113     p = STR + (str_size - 1); \
114     \
115     q = g_utf8_find_prev_char (STR, p); \
116     g_assert (q == STR + 12); \
117     q = g_utf8_find_prev_char (STR, q); \
118     g_assert (q == STR + 11); \
119     q = g_utf8_find_prev_char (STR, q); \
120     g_assert (q == STR + 8); \
121     q = g_utf8_find_prev_char (STR, q); \
122     g_assert (q == STR + 7); \
123     q = g_utf8_find_prev_char (STR, q); \
124     g_assert (q == STR + 3); \
125     q = g_utf8_find_prev_char (STR, q); \
126     g_assert (q == STR); \
127     q = g_utf8_find_prev_char (STR, q); \
128     g_assert_null (q); \
129     \
130     p = STR + 4; \
131     q = g_utf8_find_prev_char (STR, p); \
132     g_assert (q == STR + 3); \
133     \
134     p = STR + 2; \
135     q = g_utf8_find_prev_char (STR, p); \
136     g_assert (q == STR); \
137     \
138     p = STR + 2; \
139     q = g_utf8_find_next_char (p, NULL); \
140     g_assert (q == STR + 3); \
141     q = g_utf8_find_next_char (q, NULL); \
142     g_assert (q == STR + 7); \
143     \
144     q = g_utf8_find_next_char (p, STR + 6); \
145     g_assert (q == STR + 3); \
146     q = g_utf8_find_next_char (q, STR + 6); \
147     g_assert_null (q); \
148     \
149     q = g_utf8_find_next_char (STR, STR); \
150     g_assert_null (q); \
151     \
152     q = g_utf8_find_next_char (STR + strlen (STR), NULL); \
153     g_assert (q == STR + strlen (STR) + 1); \
154     \
155     /* Check return values when reaching the end of the string, \
156      * with @end set and unset. */ \
157     q = g_utf8_find_next_char (STR + 10, NULL); \
158     g_assert_nonnull (q); \
159     g_assert (*q == '\0'); \
160     \
161     q = g_utf8_find_next_char (STR + 10, STR + 11); \
162     g_assert_null (q); \
163   } G_STMT_END
164 
165   TEST_SET(str_array);
166   TEST_SET(str_copy);
167   TEST_SET(str_volatile);
168   /* Starting with GCC 8 tests on @str with "-O2 -flto" in CFLAGS fail due to
169    * (incorrect?) constant propagation of @str into @g_utf8_find_prev_char. It
170    * doesn't happen if @TEST_STR doesn't contain \0 in the middle but the tests
171    * should cover all corner cases.
172    * For instance, see https://gitlab.gnome.org/GNOME/glib/issues/1917 */
173 
174 #undef TEST_SET
175 
176   g_free (str_copy);
177 }
178 
main(int argc,char * argv[])179 int main (int argc, char *argv[])
180 {
181   g_test_init (&argc, &argv, NULL);
182 
183   g_test_add_data_func ("/utf8/offsets", longline, test_utf8);
184   g_test_add_func ("/utf8/lengths", test_length);
185   g_test_add_func ("/utf8/find", test_find);
186 
187   return g_test_run ();
188 }
189