• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  #undef G_DISABLE_ASSERT
2  #undef G_LOG_DOMAIN
3  
4  #include <glib.h>
5  #include <stdio.h>
6  #include <stdlib.h>
7  #include <string.h>
8  
9  gboolean success = TRUE;
10  
11  static char *
decode(const gchar * input)12  decode (const gchar *input)
13  {
14    unsigned ch;
15    int offset = 0;
16    GString *result = g_string_new (NULL);
17  
18    do
19      {
20        if (sscanf (input + offset, "%x", &ch) != 1)
21  	{
22  	  fprintf (stderr, "Error parsing character string %s\n", input);
23  	  exit (1);
24  	}
25  
26        g_string_append_unichar (result, ch);
27  
28        while (input[offset] && input[offset] != ' ')
29  	offset++;
30        while (input[offset] && input[offset] == ' ')
31  	offset++;
32      }
33    while (input[offset]);
34  
35    return g_string_free (result, FALSE);
36  }
37  
38  const char *names[4] = {
39    "NFD",
40    "NFC",
41    "NFKD",
42    "NFKC"
43  };
44  
45  static char *
encode(const gchar * input)46  encode (const gchar *input)
47  {
48    GString *result = g_string_new(NULL);
49  
50    const gchar *p = input;
51    while (*p)
52      {
53        gunichar c = g_utf8_get_char (p);
54        g_string_append_printf (result, "%04X ", c);
55        p = g_utf8_next_char(p);
56      }
57  
58    return g_string_free (result, FALSE);
59  }
60  
61  static void
test_form(int line,GNormalizeMode mode,gboolean do_compat,int expected,char ** c,char ** raw)62  test_form (int            line,
63  	   GNormalizeMode mode,
64  	   gboolean       do_compat,
65  	   int            expected,
66  	   char         **c,
67  	   char         **raw)
68  {
69    int i;
70  
71    gboolean mode_is_compat = (mode == G_NORMALIZE_NFKC ||
72  			     mode == G_NORMALIZE_NFKD);
73  
74    if (mode_is_compat || !do_compat)
75      {
76        for (i = 0; i < 3; i++)
77  	{
78  	  char *result = g_utf8_normalize (c[i], -1, mode);
79  	  if (strcmp (result, c[expected]) != 0)
80  	    {
81  	      char *result_raw = encode(result);
82  	      fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i + 1, raw[5]);
83  	      fprintf (stderr, "  g_utf8_normalize (%s, %s) != %s but %s\n",
84  		   raw[i], names[mode], raw[expected], result_raw);
85  	      g_free (result_raw);
86  	      success = FALSE;
87  	    }
88  
89  	  g_free (result);
90  	}
91      }
92    if (mode_is_compat || do_compat)
93      {
94        for (i = 3; i < 5; i++)
95  	{
96  	  char *result = g_utf8_normalize (c[i], -1, mode);
97  	  if (strcmp (result, c[expected]) != 0)
98  	    {
99  	      char *result_raw = encode(result);
100  	      fprintf (stderr, "\nFailure: %d/%d: %s\n", line, i, raw[5]);
101  	      fprintf (stderr, "  g_utf8_normalize (%s, %s) != %s but %s\n",
102  		   raw[i], names[mode], raw[expected], result_raw);
103  	      g_free (result_raw);
104  	      success = FALSE;
105  	    }
106  
107  	  g_free (result);
108  	}
109      }
110  }
111  
112  static gboolean
process_one(int line,gchar ** columns)113  process_one (int line, gchar **columns)
114  {
115    char *c[5];
116    int i;
117    gboolean skip = FALSE;
118  
119    for (i=0; i < 5; i++)
120      {
121        c[i] = decode(columns[i]);
122        if (!c[i])
123  	skip = TRUE;
124      }
125  
126    if (!skip)
127      {
128        test_form (line, G_NORMALIZE_NFD, FALSE, 2, c, columns);
129        test_form (line, G_NORMALIZE_NFD, TRUE, 4, c, columns);
130        test_form (line, G_NORMALIZE_NFC, FALSE, 1, c, columns);
131        test_form (line, G_NORMALIZE_NFC, TRUE, 3, c, columns);
132        test_form (line, G_NORMALIZE_NFKD, TRUE, 4, c, columns);
133        test_form (line, G_NORMALIZE_NFKC, TRUE, 3, c, columns);
134      }
135  
136    for (i=0; i < 5; i++)
137      g_free (c[i]);
138  
139    return TRUE;
140  }
141  
main(int argc,char ** argv)142  int main (int argc, char **argv)
143  {
144    GIOChannel *in;
145    GError *error = NULL;
146    GString *buffer = g_string_new (NULL);
147    int line_to_do = 0;
148    int line = 1;
149  
150    if (argc != 2 && argc != 3)
151      {
152        fprintf (stderr, "Usage: unicode-normalize NormalizationTest.txt LINE\n");
153        return 1;
154      }
155  
156    if (argc == 3)
157      line_to_do = atoi(argv[2]);
158  
159    in = g_io_channel_new_file (argv[1], "r", &error);
160    if (!in)
161      {
162        fprintf (stderr, "Cannot open %s: %s\n", argv[1], error->message);
163        return 1;
164      }
165  
166    while (TRUE)
167      {
168        gsize term_pos;
169        gchar **columns;
170  
171        if (g_io_channel_read_line_string (in, buffer, &term_pos, &error) != G_IO_STATUS_NORMAL)
172  	break;
173  
174        if (line_to_do && line != line_to_do)
175  	goto next;
176  
177        buffer->str[term_pos] = '\0';
178  
179        if (buffer->str[0] == '#') /* Comment */
180  	goto next;
181        if (buffer->str[0] == '@') /* Part */
182  	{
183  	  fprintf (stderr, "\nProcessing %s\n", buffer->str + 1);
184  	  goto next;
185  	}
186  
187        columns = g_strsplit (buffer->str, ";", -1);
188        if (!columns[0])
189  	goto next;
190  
191        if (!process_one (line, columns))
192  	return 1;
193        g_strfreev (columns);
194  
195      next:
196        g_string_truncate (buffer, 0);
197        line++;
198      }
199  
200    if (error)
201      {
202        fprintf (stderr, "Error reading test file, %s\n", error->message);
203        return 1;
204      }
205  
206    g_io_channel_unref (in);
207    g_string_free (buffer, TRUE);
208  
209    return !success;
210  }
211