1 #undef G_DISABLE_ASSERT 2 #undef G_LOG_DOMAIN 3 4 #include <glib.h> 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <string.h> 8 9 gboolean success = TRUE; 10 11 static char * decode(const gchar * input)12 decode (const gchar *input) 13 { 14 unsigned ch; 15 int offset = 0; 16 GString *result = g_string_new (NULL); 17 18 do 19 { 20 if (sscanf (input + offset, "%x", &ch) != 1) 21 { 22 fprintf (stderr, "Error parsing character string %s\n", input); 23 exit (1); 24 } 25 26 g_string_append_unichar (result, ch); 27 28 while (input[offset] && input[offset] != ' ') 29 offset++; 30 while (input[offset] && input[offset] == ' ') 31 offset++; 32 } 33 while (input[offset]); 34 35 return g_string_free (result, FALSE); 36 } 37 38 const char *names[4] = { 39 "NFD", 40 "NFC", 41 "NFKD", 42 "NFKC" 43 }; 44 45 static char * encode(const gchar * input)46 encode (const gchar *input) 47 { 48 GString *result = g_string_new(NULL); 49 50 const gchar *p = input; 51 while (*p) 52 { 53 gunichar c = g_utf8_get_char (p); 54 g_string_append_printf (result, "%04X ", c); 55 p = g_utf8_next_char(p); 56 } 57 58 return g_string_free (result, FALSE); 59 } 60 61 static void test_form(int line,GNormalizeMode mode,gboolean do_compat,int expected,char ** c,char ** raw)62 test_form (int line, 63 GNormalizeMode mode, 64 gboolean do_compat, 65 int expected, 66 char **c, 67 char **raw) 68 { 69 int i; 70 71 gboolean mode_is_compat = (mode == G_NORMALIZE_NFKC || 72 mode == G_NORMALIZE_NFKD); 73 74 if (mode_is_compat || !do_compat) 75 { 76 for (i = 0; i < 3; i++) 77 { 78 char *result = g_utf8_normalize (c[i], -1, mode); 79 if (strcmp (result, c[expected]) != 0) 80 { 81 char *result_raw = encode(result); 82 fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i + 1, raw[5]); 83 fprintf (stderr, " g_utf8_normalize (%s, %s) != %s but %s\n", 84 raw[i], names[mode], raw[expected], result_raw); 85 g_free (result_raw); 86 success = FALSE; 87 } 88 89 g_free (result); 90 } 91 } 92 if (mode_is_compat || do_compat) 93 { 94 for (i = 3; i < 5; i++) 95 { 96 char *result = g_utf8_normalize (c[i], -1, mode); 97 if (strcmp (result, c[expected]) != 0) 98 { 99 char *result_raw = encode(result); 100 fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i, raw[5]); 101 fprintf (stderr, " g_utf8_normalize (%s, %s) != %s but %s\n", 102 raw[i], names[mode], raw[expected], result_raw); 103 g_free (result_raw); 104 success = FALSE; 105 } 106 107 g_free (result); 108 } 109 } 110 } 111 112 static gboolean process_one(int line,gchar ** columns)113 process_one (int line, gchar **columns) 114 { 115 char *c[5]; 116 int i; 117 gboolean skip = FALSE; 118 119 for (i=0; i < 5; i++) 120 { 121 c[i] = decode(columns[i]); 122 if (!c[i]) 123 skip = TRUE; 124 } 125 126 if (!skip) 127 { 128 test_form (line, G_NORMALIZE_NFD, FALSE, 2, c, columns); 129 test_form (line, G_NORMALIZE_NFD, TRUE, 4, c, columns); 130 test_form (line, G_NORMALIZE_NFC, FALSE, 1, c, columns); 131 test_form (line, G_NORMALIZE_NFC, TRUE, 3, c, columns); 132 test_form (line, G_NORMALIZE_NFKD, TRUE, 4, c, columns); 133 test_form (line, G_NORMALIZE_NFKC, TRUE, 3, c, columns); 134 } 135 136 for (i=0; i < 5; i++) 137 g_free (c[i]); 138 139 return TRUE; 140 } 141 main(int argc,char ** argv)142 int main (int argc, char **argv) 143 { 144 GIOChannel *in; 145 GError *error = NULL; 146 GString *buffer = g_string_new (NULL); 147 int line_to_do = 0; 148 int line = 1; 149 150 if (argc != 2 && argc != 3) 151 { 152 fprintf (stderr, "Usage: unicode-normalize NormalizationTest.txt LINE\n"); 153 return 1; 154 } 155 156 if (argc == 3) 157 line_to_do = atoi(argv[2]); 158 159 in = g_io_channel_new_file (argv[1], "r", &error); 160 if (!in) 161 { 162 fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message); 163 return 1; 164 } 165 166 while (TRUE) 167 { 168 gsize term_pos; 169 gchar **columns; 170 171 if (g_io_channel_read_line_string (in, buffer, &term_pos, &error) != G_IO_STATUS_NORMAL) 172 break; 173 174 if (line_to_do && line != line_to_do) 175 goto next; 176 177 buffer->str[term_pos] = '\0'; 178 179 if (buffer->str[0] == '#') /* Comment */ 180 goto next; 181 if (buffer->str[0] == '@') /* Part */ 182 { 183 fprintf (stderr, "\nProcessing %s\n", buffer->str + 1); 184 goto next; 185 } 186 187 columns = g_strsplit (buffer->str, ";", -1); 188 if (!columns[0]) 189 goto next; 190 191 if (!process_one (line, columns)) 192 return 1; 193 g_strfreev (columns); 194 195 next: 196 g_string_truncate (buffer, 0); 197 line++; 198 } 199 200 if (error) 201 { 202 fprintf (stderr, "Error reading test file, %s\n", error->message); 203 return 1; 204 } 205 206 g_io_channel_unref (in); 207 g_string_free (buffer, TRUE); 208 209 return !success; 210 } 211