1 #undef G_DISABLE_ASSERT
2 #undef G_LOG_DOMAIN
3
4 #include <glib.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8
9 gboolean success = TRUE;
10
11 static char *
decode(const gchar * input)12 decode (const gchar *input)
13 {
14 unsigned ch;
15 int offset = 0;
16 GString *result = g_string_new (NULL);
17
18 do
19 {
20 if (sscanf (input + offset, "%x", &ch) != 1)
21 {
22 fprintf (stderr, "Error parsing character string %s\n", input);
23 exit (1);
24 }
25
26 g_string_append_unichar (result, ch);
27
28 while (input[offset] && input[offset] != ' ')
29 offset++;
30 while (input[offset] && input[offset] == ' ')
31 offset++;
32 }
33 while (input[offset]);
34
35 return g_string_free (result, FALSE);
36 }
37
38 const char *names[4] = {
39 "NFD",
40 "NFC",
41 "NFKD",
42 "NFKC"
43 };
44
45 static char *
encode(const gchar * input)46 encode (const gchar *input)
47 {
48 GString *result = g_string_new(NULL);
49
50 const gchar *p = input;
51 while (*p)
52 {
53 gunichar c = g_utf8_get_char (p);
54 g_string_append_printf (result, "%04X ", c);
55 p = g_utf8_next_char(p);
56 }
57
58 return g_string_free (result, FALSE);
59 }
60
61 static void
test_form(int line,GNormalizeMode mode,gboolean do_compat,int expected,char ** c,char ** raw)62 test_form (int line,
63 GNormalizeMode mode,
64 gboolean do_compat,
65 int expected,
66 char **c,
67 char **raw)
68 {
69 int i;
70
71 gboolean mode_is_compat = (mode == G_NORMALIZE_NFKC ||
72 mode == G_NORMALIZE_NFKD);
73
74 if (mode_is_compat || !do_compat)
75 {
76 for (i = 0; i < 3; i++)
77 {
78 char *result = g_utf8_normalize (c[i], -1, mode);
79 if (strcmp (result, c[expected]) != 0)
80 {
81 char *result_raw = encode(result);
82 fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i + 1, raw[5]);
83 fprintf (stderr, " g_utf8_normalize (%s, %s) != %s but %s\n",
84 raw[i], names[mode], raw[expected], result_raw);
85 g_free (result_raw);
86 success = FALSE;
87 }
88
89 g_free (result);
90 }
91 }
92 if (mode_is_compat || do_compat)
93 {
94 for (i = 3; i < 5; i++)
95 {
96 char *result = g_utf8_normalize (c[i], -1, mode);
97 if (strcmp (result, c[expected]) != 0)
98 {
99 char *result_raw = encode(result);
100 fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i, raw[5]);
101 fprintf (stderr, " g_utf8_normalize (%s, %s) != %s but %s\n",
102 raw[i], names[mode], raw[expected], result_raw);
103 g_free (result_raw);
104 success = FALSE;
105 }
106
107 g_free (result);
108 }
109 }
110 }
111
112 static gboolean
process_one(int line,gchar ** columns)113 process_one (int line, gchar **columns)
114 {
115 char *c[5];
116 int i;
117 gboolean skip = FALSE;
118
119 for (i=0; i < 5; i++)
120 {
121 c[i] = decode(columns[i]);
122 if (!c[i])
123 skip = TRUE;
124 }
125
126 if (!skip)
127 {
128 test_form (line, G_NORMALIZE_NFD, FALSE, 2, c, columns);
129 test_form (line, G_NORMALIZE_NFD, TRUE, 4, c, columns);
130 test_form (line, G_NORMALIZE_NFC, FALSE, 1, c, columns);
131 test_form (line, G_NORMALIZE_NFC, TRUE, 3, c, columns);
132 test_form (line, G_NORMALIZE_NFKD, TRUE, 4, c, columns);
133 test_form (line, G_NORMALIZE_NFKC, TRUE, 3, c, columns);
134 }
135
136 for (i=0; i < 5; i++)
137 g_free (c[i]);
138
139 return TRUE;
140 }
141
main(int argc,char ** argv)142 int main (int argc, char **argv)
143 {
144 GIOChannel *in;
145 GError *error = NULL;
146 GString *buffer = g_string_new (NULL);
147 int line_to_do = 0;
148 int line = 1;
149
150 if (argc != 2 && argc != 3)
151 {
152 fprintf (stderr, "Usage: unicode-normalize NormalizationTest.txt LINE\n");
153 return 1;
154 }
155
156 if (argc == 3)
157 line_to_do = atoi(argv[2]);
158
159 in = g_io_channel_new_file (argv[1], "r", &error);
160 if (!in)
161 {
162 fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message);
163 return 1;
164 }
165
166 while (TRUE)
167 {
168 gsize term_pos;
169 gchar **columns;
170
171 if (g_io_channel_read_line_string (in, buffer, &term_pos, &error) != G_IO_STATUS_NORMAL)
172 break;
173
174 if (line_to_do && line != line_to_do)
175 goto next;
176
177 buffer->str[term_pos] = '\0';
178
179 if (buffer->str[0] == '#') /* Comment */
180 goto next;
181 if (buffer->str[0] == '@') /* Part */
182 {
183 fprintf (stderr, "\nProcessing %s\n", buffer->str + 1);
184 goto next;
185 }
186
187 columns = g_strsplit (buffer->str, ";", -1);
188 if (!columns[0])
189 goto next;
190
191 if (!process_one (line, columns))
192 return 1;
193 g_strfreev (columns);
194
195 next:
196 g_string_truncate (buffer, 0);
197 line++;
198 }
199
200 if (error)
201 {
202 fprintf (stderr, "Error reading test file, %s\n", error->message);
203 return 1;
204 }
205
206 g_io_channel_unref (in);
207 g_string_free (buffer, TRUE);
208
209 return !success;
210 }
211