1 /* Unicode CLDR plural rule parser and converter
2 Copyright (C) 2015, 2018-2020 Free Software Foundation, Inc.
3
4 This file was written by Daiki Ueno <ueno@gnu.org>, 2015.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22
23 #include "basename-lgpl.h"
24 #include "cldr-plural-exp.h"
25 #include "closeout.h"
26 #include "c-ctype.h"
27 #include <errno.h>
28 #include <error.h>
29 #include <getopt.h>
30 #include "gettext.h"
31 #include <libxml/tree.h>
32 #include <libxml/parser.h>
33 #include <locale.h>
34 #include "progname.h"
35 #include "propername.h"
36 #include "relocatable.h"
37 #include <stdlib.h>
38 #include <string.h>
39 #include "xalloc.h"
40
41 #define _(s) gettext(s)
42
43
44 static char *
extract_rules(FILE * fp,const char * real_filename,const char * logical_filename,const char * locale)45 extract_rules (FILE *fp,
46 const char *real_filename, const char *logical_filename,
47 const char *locale)
48 {
49 xmlDocPtr doc;
50 xmlNodePtr node, n;
51 size_t locale_length;
52 char *buffer = NULL, *p;
53 size_t bufmax = 0;
54 size_t buflen = 0;
55
56 doc = xmlReadFd (fileno (fp), logical_filename, NULL,
57 XML_PARSE_NONET
58 | XML_PARSE_NOWARNING
59 | XML_PARSE_NOBLANKS);
60 if (doc == NULL)
61 error (EXIT_FAILURE, 0, _("Could not parse file %s as XML"), logical_filename);
62
63 node = xmlDocGetRootElement (doc);
64 if (!node || !xmlStrEqual (node->name, BAD_CAST "supplementalData"))
65 {
66 error_at_line (0, 0,
67 logical_filename,
68 xmlGetLineNo (node),
69 _("The root element must be <%s>"),
70 "supplementalData");
71 goto out;
72 }
73
74 for (n = node->children; n; n = n->next)
75 {
76 if (n->type == XML_ELEMENT_NODE
77 && xmlStrEqual (n->name, BAD_CAST "plurals"))
78 break;
79 }
80 if (!n)
81 {
82 error (0, 0, _("The element <%s> does not contain a <%s> element"),
83 "supplementalData", "plurals");
84 goto out;
85 }
86
87 locale_length = strlen (locale);
88 for (n = n->children; n; n = n->next)
89 {
90 xmlChar *locales;
91 xmlChar *cp;
92 xmlNodePtr n2;
93 bool found = false;
94
95 if (n->type != XML_ELEMENT_NODE
96 || !xmlStrEqual (n->name, BAD_CAST "pluralRules"))
97 continue;
98
99 if (!xmlHasProp (n, BAD_CAST "locales"))
100 {
101 error_at_line (0, 0,
102 logical_filename,
103 xmlGetLineNo (n),
104 _("The element <%s> does not have attribute <%s>"),
105 "pluralRules", "locales");
106 continue;
107 }
108
109 cp = locales = xmlGetProp (n, BAD_CAST "locales");
110 while (*cp != '\0')
111 {
112 while (c_isspace (*cp))
113 cp++;
114 if (xmlStrncmp (cp, BAD_CAST locale, locale_length) == 0
115 && (*(cp + locale_length) == '\0'
116 || c_isspace (*(cp + locale_length))))
117 {
118 found = true;
119 break;
120 }
121 while (*cp && !c_isspace (*cp))
122 cp++;
123 }
124 xmlFree (locales);
125
126 if (!found)
127 continue;
128
129 for (n2 = n->children; n2; n2 = n2->next)
130 {
131 xmlChar *count;
132 xmlChar *content;
133 size_t length;
134
135 if (n2->type != XML_ELEMENT_NODE
136 || !xmlStrEqual (n2->name, BAD_CAST "pluralRule"))
137 continue;
138
139 if (!xmlHasProp (n2, BAD_CAST "count"))
140 {
141 error_at_line (0, 0,
142 logical_filename,
143 xmlGetLineNo (n2),
144 _("The element <%s> does not have attribute <%s>"),
145 "pluralRule", "count");
146 break;
147 }
148
149 count = xmlGetProp (n2, BAD_CAST "count");
150 content = xmlNodeGetContent (n2);
151 length = xmlStrlen (count) + strlen (": ")
152 + xmlStrlen (content) + strlen ("; ");
153
154 if (buflen + length + 1 > bufmax)
155 {
156 bufmax *= 2;
157 if (bufmax < buflen + length + 1)
158 bufmax = buflen + length + 1;
159 buffer = (char *) xrealloc (buffer, bufmax);
160 }
161
162 sprintf (buffer + buflen, "%s: %s; ", count, content);
163 xmlFree (count);
164 xmlFree (content);
165
166 buflen += length;
167 }
168 }
169
170 if (buffer)
171 {
172 /* Scrub the last semicolon, if any. */
173 p = strrchr (buffer, ';');
174 if (p)
175 *p = '\0';
176 }
177
178 out:
179 xmlFreeDoc (doc);
180 return buffer;
181 }
182
183 /* Display usage information and exit. */
184 static void
usage(int status)185 usage (int status)
186 {
187 if (status != EXIT_SUCCESS)
188 fprintf (stderr, _("Try '%s --help' for more information.\n"),
189 program_name);
190 else
191 {
192 printf (_("\
193 Usage: %s [OPTION...] [LOCALE RULES]...\n\
194 "), program_name);
195 printf ("\n");
196 /* xgettext: no-wrap */
197 printf (_("\
198 Extract or convert Unicode CLDR plural rules.\n\
199 \n\
200 If both LOCALE and RULES are specified, it reads CLDR plural rules for\n\
201 LOCALE from RULES and print them in a form suitable for gettext use.\n\
202 If no argument is given, it reads CLDR plural rules from the standard input.\n\
203 "));
204 printf ("\n");
205 /* xgettext: no-wrap */
206 printf (_("\
207 Mandatory arguments to long options are mandatory for short options too.\n\
208 Similarly for optional arguments.\n\
209 "));
210 printf ("\n");
211 printf (_("\
212 -c, --cldr print plural rules in the CLDR format\n"));
213 printf (_("\
214 -h, --help display this help and exit\n"));
215 printf (_("\
216 -V, --version output version information and exit\n"));
217 printf ("\n");
218 /* TRANSLATORS: The first placeholder is the web address of the Savannah
219 project of this package. The second placeholder is the bug-reporting
220 email address for this package. Please add _another line_ saying
221 "Report translation bugs to <...>\n" with the address for translation
222 bugs (typically your translation team's web or email address). */
223 printf(_("\
224 Report bugs in the bug tracker at <%s>\n\
225 or by email to <%s>.\n"),
226 "https://savannah.gnu.org/projects/gettext",
227 "bug-gettext@gnu.org");
228 }
229 exit (status);
230 }
231
232 /* Long options. */
233 static const struct option long_options[] =
234 {
235 { "cldr", no_argument, NULL, 'c' },
236 { "help", no_argument, NULL, 'h' },
237 { "version", no_argument, NULL, 'V' },
238 { NULL, 0, NULL, 0 }
239 };
240
241 int
main(int argc,char ** argv)242 main (int argc, char **argv)
243 {
244 bool opt_cldr_format = false;
245 bool do_help = false;
246 bool do_version = false;
247 int optchar;
248
249 /* Set program name for messages. */
250 set_program_name (argv[0]);
251
252 /* Set locale via LC_ALL. */
253 setlocale (LC_ALL, "");
254
255 /* Set the text message domain. */
256 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
257 bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
258 textdomain (PACKAGE);
259
260 /* Ensure that write errors on stdout are detected. */
261 atexit (close_stdout);
262
263 while ((optchar = getopt_long (argc, argv, "chV", long_options, NULL)) != EOF)
264 switch (optchar)
265 {
266 case '\0': /* Long option. */
267 break;
268
269 case 'c':
270 opt_cldr_format = true;
271 break;
272
273 case 'h':
274 do_help = true;
275 break;
276
277 case 'V':
278 do_version = true;
279 break;
280
281 default:
282 usage (EXIT_FAILURE);
283 /* NOTREACHED */
284 }
285
286 /* Version information requested. */
287 if (do_version)
288 {
289 printf ("%s (GNU %s) %s\n", last_component (program_name),
290 PACKAGE, VERSION);
291 /* xgettext: no-wrap */
292 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
293 License GPLv3+: GNU GPL version 3 or later <%s>\n\
294 This is free software: you are free to change and redistribute it.\n\
295 There is NO WARRANTY, to the extent permitted by law.\n\
296 "),
297 "2015-2020", "https://gnu.org/licenses/gpl.html");
298 printf (_("Written by %s.\n"), proper_name ("Daiki Ueno"));
299 exit (EXIT_SUCCESS);
300 }
301
302 /* Help is requested. */
303 if (do_help)
304 usage (EXIT_SUCCESS);
305
306 if (argc == optind + 2)
307 {
308 /* Two arguments: Read CLDR rules from a file. */
309 const char *locale = argv[optind];
310 const char *logical_filename = argv[optind + 1];
311 char *extracted_rules;
312 FILE *fp;
313
314 LIBXML_TEST_VERSION
315
316 fp = fopen (logical_filename, "r");
317 if (fp == NULL)
318 error (1, 0, _("%s cannot be read"), logical_filename);
319
320 extracted_rules = extract_rules (fp, logical_filename, logical_filename,
321 locale);
322 fclose (fp);
323 if (extracted_rules == NULL)
324 error (1, 0, _("cannot extract rules for %s"), locale);
325
326 if (opt_cldr_format)
327 printf ("%s\n", extracted_rules);
328 else
329 {
330 struct cldr_plural_rule_list_ty *result;
331
332 result = cldr_plural_parse (extracted_rules);
333 if (result == NULL)
334 error (1, 0, _("cannot parse CLDR rule"));
335
336 cldr_plural_rule_list_print (result, stdout);
337 cldr_plural_rule_list_free (result);
338 }
339 free (extracted_rules);
340 }
341 else if (argc == optind)
342 {
343 /* No argument: Read CLDR rules from standard input. */
344 char *line = NULL;
345 size_t line_size = 0;
346 for (;;)
347 {
348 int line_len;
349 struct cldr_plural_rule_list_ty *result;
350
351 line_len = getline (&line, &line_size, stdin);
352 if (line_len < 0)
353 break;
354 if (line_len > 0 && line[line_len - 1] == '\n')
355 line[--line_len] = '\0';
356
357 result = cldr_plural_parse (line);
358 if (result)
359 {
360 cldr_plural_rule_list_print (result, stdout);
361 cldr_plural_rule_list_free (result);
362 }
363 }
364
365 free (line);
366 }
367 else
368 {
369 error (1, 0, _("extra operand %s"), argv[optind]);
370 }
371
372 return 0;
373 }
374