1 /* Unicode CLDR plural rule parser and converter
2 Copyright (C) 2015, 2020 Free Software Foundation, Inc.
3
4 This file was written by Daiki Ueno <ueno@gnu.org>, 2015.
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18
19 %{
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include "unistr.h"
29 #include "xalloc.h"
30
31 #include "cldr-plural-exp.h"
32 #include "cldr-plural.h"
33
34 /* Prototypes for local functions. */
35 static int yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg);
36 static void yyerror (struct cldr_plural_parse_args *arg, const char *str);
37
38 /* Allocation of expressions. */
39
40 static struct cldr_plural_rule_ty *
new_rule(char * name,struct cldr_plural_condition_ty * condition)41 new_rule (char *name, struct cldr_plural_condition_ty *condition)
42 {
43 struct cldr_plural_rule_ty *result =
44 XMALLOC (struct cldr_plural_rule_ty);
45 result->name = name;
46 result->condition = condition;
47 return result;
48 }
49
50 static struct cldr_plural_condition_ty *
new_leaf_condition(struct cldr_plural_relation_ty * relation)51 new_leaf_condition (struct cldr_plural_relation_ty *relation)
52 {
53 struct cldr_plural_condition_ty *result =
54 XMALLOC (struct cldr_plural_condition_ty);
55 result->type = CLDR_PLURAL_CONDITION_RELATION;
56 result->value.relation = relation;
57 return result;
58 }
59
60 static struct cldr_plural_condition_ty *
new_branch_condition(enum cldr_plural_condition type,struct cldr_plural_condition_ty * condition0,struct cldr_plural_condition_ty * condition1)61 new_branch_condition (enum cldr_plural_condition type,
62 struct cldr_plural_condition_ty *condition0,
63 struct cldr_plural_condition_ty *condition1)
64 {
65 struct cldr_plural_condition_ty *result =
66 XMALLOC (struct cldr_plural_condition_ty);
67 result->type = type;
68 result->value.conditions[0] = condition0;
69 result->value.conditions[1] = condition1;
70 return result;
71 }
72
73 static struct cldr_plural_relation_ty *
new_relation(struct cldr_plural_expression_ty * expression,enum cldr_plural_relation type,struct cldr_plural_range_list_ty * ranges)74 new_relation (struct cldr_plural_expression_ty *expression,
75 enum cldr_plural_relation type,
76 struct cldr_plural_range_list_ty *ranges)
77 {
78 struct cldr_plural_relation_ty *result =
79 XMALLOC (struct cldr_plural_relation_ty);
80 result->expression = expression;
81 result->type = type;
82 result->ranges = ranges;
83 return result;
84 }
85
86 static struct cldr_plural_expression_ty *
new_expression(int operand,int mod)87 new_expression (int operand, int mod)
88 {
89 struct cldr_plural_expression_ty *result =
90 XMALLOC (struct cldr_plural_expression_ty);
91 result->operand = operand;
92 result->mod = mod;
93 return result;
94 }
95
96 static struct cldr_plural_range_list_ty *
add_range(struct cldr_plural_range_list_ty * ranges,struct cldr_plural_range_ty * range)97 add_range (struct cldr_plural_range_list_ty *ranges,
98 struct cldr_plural_range_ty *range)
99 {
100 if (ranges->nitems == ranges->nitems_max)
101 {
102 ranges->nitems_max = ranges->nitems_max * 2 + 1;
103 ranges->items = xrealloc (ranges->items,
104 sizeof (struct cldr_plural_range_ty *)
105 * ranges->nitems_max);
106 }
107 ranges->items[ranges->nitems++] = range;
108 return ranges;
109 }
110
111 static struct cldr_plural_range_ty *
new_range(struct cldr_plural_operand_ty * start,struct cldr_plural_operand_ty * end)112 new_range (struct cldr_plural_operand_ty *start,
113 struct cldr_plural_operand_ty *end)
114 {
115 struct cldr_plural_range_ty *result =
116 XMALLOC (struct cldr_plural_range_ty);
117 result->start = start;
118 result->end = end;
119 return result;
120 }
121 %}
122
123 %require "3.0"
124
125 %parse-param {struct cldr_plural_parse_args *arg}
126 %lex-param {struct cldr_plural_parse_args *arg}
127 %define api.pure full
128
129 %union {
130 char *sval;
131 struct cldr_plural_condition_ty *cval;
132 struct cldr_plural_relation_ty *lval;
133 struct cldr_plural_expression_ty *eval;
134 struct cldr_plural_range_ty *gval;
135 struct cldr_plural_operand_ty *oval;
136 struct cldr_plural_range_list_ty *rval;
137 int ival;
138 }
139
140 %destructor { free ($$); } <sval>
141 %destructor { cldr_plural_condition_free ($$); } <cval>
142 %destructor { cldr_plural_relation_free ($$); } <lval>
143 %destructor { free ($$); } <eval>
144 %destructor { cldr_plural_range_free ($$); } <gval>
145 %destructor { free ($$); } <oval>
146 %destructor { cldr_plural_range_list_free ($$); } <rval>
147 %destructor { } <ival>
148
149 %token AND OR RANGE ELLIPSIS OTHER AT_INTEGER AT_DECIMAL
150 %token<sval> KEYWORD
151 %token<oval> INTEGER DECIMAL
152 %token<ival> OPERAND
153 %type<cval> condition and_condition
154 %type<lval> relation
155 %type<eval> expression
156 %type<gval> range range_or_integer
157 %type<rval> range_list
158
159 %%
160
161 rules: rule
162 | rules ';' rule
163 ;
164
165 rule: KEYWORD ':' condition samples
166 {
167 struct cldr_plural_rule_ty *rule = new_rule ($1, $3);
168 struct cldr_plural_rule_list_ty *result = arg->result;
169 if (result->nitems == result->nitems_max)
170 {
171 result->nitems_max = result->nitems_max * 2 + 1;
172 result->items = xrealloc (result->items,
173 sizeof (struct cldr_plural_rule_ty *)
174 * result->nitems_max);
175 }
176 result->items[result->nitems++] = rule;
177 }
178 | OTHER ':' samples
179 ;
180
181 condition: and_condition
182 {
183 $$ = $1;
184 }
185 | condition OR and_condition
186 {
187 $$ = new_branch_condition (CLDR_PLURAL_CONDITION_OR, $1, $3);
188 }
189 ;
190
191 and_condition: relation
192 {
193 $$ = new_leaf_condition ($1);
194 }
195 | and_condition AND relation
196 {
197 $$ = new_branch_condition (CLDR_PLURAL_CONDITION_AND,
198 $1,
199 new_leaf_condition ($3));
200 }
201 ;
202
203 relation: expression '=' range_list
204 {
205 $$ = new_relation ($1, CLDR_PLURAL_RELATION_EQUAL, $3);
206 }
207 | expression '!' range_list
208 {
209 $$ = new_relation ($1, CLDR_PLURAL_RELATION_NOT_EQUAL, $3);
210 }
211 ;
212
213 expression: OPERAND
214 {
215 $$ = new_expression ($1, 0);
216 }
217 | OPERAND '%' INTEGER
218 {
219 $$ = new_expression ($1, $3->value.ival);
220 }
221 ;
222
223 range_list: range_or_integer
224 {
225 struct cldr_plural_range_list_ty *ranges =
226 XMALLOC (struct cldr_plural_range_list_ty);
227 memset (ranges, 0, sizeof (struct cldr_plural_range_list_ty));
228 $$ = add_range (ranges, $1);
229 }
230 | range_list ',' range_or_integer
231 {
232 $$ = add_range ($1, $3);
233 }
234 ;
235
236 range_or_integer: range
237 {
238 $$ = $1;
239 }
240 | INTEGER
241 {
242 $$ = new_range ($1, $1);
243 }
244 ;
245
246 range: INTEGER RANGE INTEGER
247 {
248 $$ = new_range ($1, $3);
249 }
250 ;
251
252 /* FIXME: collect samples */
253 samples: at_integer at_decimal
254 ;
255
256 at_integer: %empty
257 | AT_INTEGER sample_list
258 ;
259
260 at_decimal: %empty
261 | AT_DECIMAL sample_list
262 ;
263
264 sample_list: sample_list1 sample_ellipsis
265 ;
266 sample_list1: sample_range
267 | sample_list1 ',' sample_range
268 ;
269 sample_ellipsis: %empty
270 | ',' ELLIPSIS
271 ;
272
273 sample_range: DECIMAL
274 { free ($1); }
275 | DECIMAL '~' DECIMAL
276 { free ($1); free ($3); }
277 | INTEGER
278 { free ($1); }
279 | INTEGER '~' INTEGER
280 { free ($1); free ($3); }
281 ;
282
283 %%
284
285 static int
286 yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg)
287 {
288 const char *exp = arg->cp;
289 ucs4_t uc;
290 int length;
291 int result;
292 static char *buffer;
293 static size_t bufmax;
294 size_t bufpos;
295
296 while (1)
297 {
298 if (exp[0] == '\0')
299 {
300 arg->cp = exp;
301 return YYEOF;
302 }
303
304 if (exp[0] != ' ' && exp[0] != '\t')
305 break;
306
307 ++exp;
308 }
309
310 length = u8_mbtouc (&uc, (const uint8_t *) exp, arg->cp_end - exp);
311 if (uc == 0x2026)
312 {
313 arg->cp = exp + length;
314 return ELLIPSIS;
315 }
316 else if (strncmp ("...", exp, 3) == 0)
317 {
318 arg->cp = exp + 3;
319 return ELLIPSIS;
320 }
321 else if (strncmp ("..", exp, 2) == 0)
322 {
323 arg->cp = exp + 2;
324 return RANGE;
325 }
326 else if (strncmp ("other", exp, 5) == 0)
327 {
328 arg->cp = exp + 5;
329 return OTHER;
330 }
331 else if (strncmp ("@integer", exp, 8) == 0)
332 {
333 arg->cp = exp + 8;
334 return AT_INTEGER;
335 }
336 else if (strncmp ("@decimal", exp, 8) == 0)
337 {
338 arg->cp = exp + 8;
339 return AT_DECIMAL;
340 }
341
342 result = *exp++;
343 switch (result)
344 {
345 case '0': case '1': case '2': case '3': case '4':
346 case '5': case '6': case '7': case '8': case '9':
347 {
348 unsigned long int ival = result - '0';
349
350 while (exp[0] >= '0' && exp[0] <= '9')
351 {
352 ival *= 10;
353 ival += exp[0] - '0';
354 ++exp;
355 }
356
357 lval->oval = XMALLOC (struct cldr_plural_operand_ty);
358 if (exp[0] == '.' && exp[1] >= '0' && exp[1] <= '9')
359 {
360 double dval = ival;
361 int denominator = 10, nfractions = 0;
362 ++exp;
363 while (exp[0] >= '0' && exp[0] <= '9')
364 {
365 dval += (exp[0] - '0') / (double) denominator;
366 denominator *= 10;
367 ++nfractions;
368 ++exp;
369 }
370 lval->oval->type = CLDR_PLURAL_OPERAND_DECIMAL;
371 lval->oval->value.dval.d = dval;
372 lval->oval->value.dval.nfractions = nfractions;
373 result = DECIMAL;
374 }
375 else
376 {
377 lval->oval->type = CLDR_PLURAL_OPERAND_INTEGER;
378 lval->oval->value.ival = ival;
379 result = INTEGER;
380 }
381 }
382 break;
383 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
384 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
385 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
386 case 'v': case 'w': case 'x': case 'y': case 'z':
387 bufpos = 0;
388 for (;;)
389 {
390 if (bufpos >= bufmax)
391 {
392 bufmax = 2 * bufmax + 10;
393 buffer = xrealloc (buffer, bufmax);
394 }
395 buffer[bufpos++] = result;
396 result = *exp;
397 switch (result)
398 {
399 case 'a': case 'b': case 'c': case 'd': case 'e':
400 case 'f': case 'g': case 'h': case 'i': case 'j':
401 case 'k': case 'l': case 'm': case 'n': case 'o':
402 case 'p': case 'q': case 'r': case 's': case 't':
403 case 'u': case 'v': case 'w': case 'x': case 'y':
404 case 'z':
405 ++exp;
406 continue;
407 default:
408 break;
409 }
410 break;
411 }
412
413 if (bufpos >= bufmax)
414 {
415 bufmax = 2 * bufmax + 10;
416 buffer = xrealloc (buffer, bufmax);
417 }
418 buffer[bufpos] = '\0';
419
420 /* Operands. */
421 if (bufpos == 1)
422 {
423 switch (buffer[0])
424 {
425 case 'n': case 'i': case 'f': case 't': case 'v': case 'w':
426 arg->cp = exp;
427 lval->ival = buffer[0];
428 return OPERAND;
429 default:
430 break;
431 }
432 }
433
434 /* Keywords. */
435 if (strcmp (buffer, "and") == 0)
436 {
437 arg->cp = exp;
438 return AND;
439 }
440 else if (strcmp (buffer, "or") == 0)
441 {
442 arg->cp = exp;
443 return OR;
444 }
445
446 lval->sval = xstrdup (buffer);
447 result = KEYWORD;
448 break;
449 case '!':
450 if (exp[0] == '=')
451 {
452 ++exp;
453 result = '!';
454 }
455 else
456 result = YYERRCODE;
457 break;
458 default:
459 break;
460 }
461
462 arg->cp = exp;
463
464 return result;
465 }
466
467 static void
468 yyerror (struct cldr_plural_parse_args *arg, char const *s)
469 {
470 fprintf (stderr, "%s\n", s);
471 }
472