• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Unicode CLDR plural rule parser and converter
2    Copyright (C) 2015, 2020 Free Software Foundation, Inc.
3 
4    This file was written by Daiki Ueno <ueno@gnu.org>, 2015.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 %{
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include "unistr.h"
29 #include "xalloc.h"
30 
31 #include "cldr-plural-exp.h"
32 #include "cldr-plural.h"
33 
34 /* Prototypes for local functions.  */
35 static int yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg);
36 static void yyerror (struct cldr_plural_parse_args *arg, const char *str);
37 
38 /* Allocation of expressions.  */
39 
40 static struct cldr_plural_rule_ty *
new_rule(char * name,struct cldr_plural_condition_ty * condition)41 new_rule (char *name, struct cldr_plural_condition_ty *condition)
42 {
43   struct cldr_plural_rule_ty *result =
44     XMALLOC (struct cldr_plural_rule_ty);
45   result->name = name;
46   result->condition = condition;
47   return result;
48 }
49 
50 static struct cldr_plural_condition_ty *
new_leaf_condition(struct cldr_plural_relation_ty * relation)51 new_leaf_condition (struct cldr_plural_relation_ty *relation)
52 {
53   struct cldr_plural_condition_ty *result =
54     XMALLOC (struct cldr_plural_condition_ty);
55   result->type = CLDR_PLURAL_CONDITION_RELATION;
56   result->value.relation = relation;
57   return result;
58 }
59 
60 static struct cldr_plural_condition_ty *
new_branch_condition(enum cldr_plural_condition type,struct cldr_plural_condition_ty * condition0,struct cldr_plural_condition_ty * condition1)61 new_branch_condition (enum cldr_plural_condition type,
62                       struct cldr_plural_condition_ty *condition0,
63                       struct cldr_plural_condition_ty *condition1)
64 {
65   struct cldr_plural_condition_ty *result =
66     XMALLOC (struct cldr_plural_condition_ty);
67   result->type = type;
68   result->value.conditions[0] = condition0;
69   result->value.conditions[1] = condition1;
70   return result;
71 }
72 
73 static struct cldr_plural_relation_ty *
new_relation(struct cldr_plural_expression_ty * expression,enum cldr_plural_relation type,struct cldr_plural_range_list_ty * ranges)74 new_relation (struct cldr_plural_expression_ty *expression,
75               enum cldr_plural_relation type,
76               struct cldr_plural_range_list_ty *ranges)
77 {
78   struct cldr_plural_relation_ty *result =
79     XMALLOC (struct cldr_plural_relation_ty);
80   result->expression = expression;
81   result->type = type;
82   result->ranges = ranges;
83   return result;
84 }
85 
86 static struct cldr_plural_expression_ty *
new_expression(int operand,int mod)87 new_expression (int operand, int mod)
88 {
89   struct cldr_plural_expression_ty *result =
90     XMALLOC (struct cldr_plural_expression_ty);
91   result->operand = operand;
92   result->mod = mod;
93   return result;
94 }
95 
96 static struct cldr_plural_range_list_ty *
add_range(struct cldr_plural_range_list_ty * ranges,struct cldr_plural_range_ty * range)97 add_range (struct cldr_plural_range_list_ty *ranges,
98            struct cldr_plural_range_ty *range)
99 {
100   if (ranges->nitems == ranges->nitems_max)
101     {
102       ranges->nitems_max = ranges->nitems_max * 2 + 1;
103       ranges->items = xrealloc (ranges->items,
104                                 sizeof (struct cldr_plural_range_ty *)
105                                 * ranges->nitems_max);
106     }
107   ranges->items[ranges->nitems++] = range;
108   return ranges;
109 }
110 
111 static struct cldr_plural_range_ty *
new_range(struct cldr_plural_operand_ty * start,struct cldr_plural_operand_ty * end)112 new_range (struct cldr_plural_operand_ty *start,
113            struct cldr_plural_operand_ty *end)
114 {
115   struct cldr_plural_range_ty *result =
116     XMALLOC (struct cldr_plural_range_ty);
117   result->start = start;
118   result->end = end;
119   return result;
120 }
121 %}
122 
123 %require "3.0"
124 
125 %parse-param {struct cldr_plural_parse_args *arg}
126 %lex-param {struct cldr_plural_parse_args *arg}
127 %define api.pure full
128 
129 %union {
130   char *sval;
131   struct cldr_plural_condition_ty *cval;
132   struct cldr_plural_relation_ty *lval;
133   struct cldr_plural_expression_ty *eval;
134   struct cldr_plural_range_ty *gval;
135   struct cldr_plural_operand_ty *oval;
136   struct cldr_plural_range_list_ty *rval;
137   int ival;
138 }
139 
140 %destructor { free ($$); } <sval>
141 %destructor { cldr_plural_condition_free ($$); } <cval>
142 %destructor { cldr_plural_relation_free ($$); } <lval>
143 %destructor { free ($$); } <eval>
144 %destructor { cldr_plural_range_free ($$); } <gval>
145 %destructor { free ($$); } <oval>
146 %destructor { cldr_plural_range_list_free ($$); } <rval>
147 %destructor { } <ival>
148 
149 %token AND OR RANGE ELLIPSIS OTHER AT_INTEGER AT_DECIMAL
150 %token<sval> KEYWORD
151 %token<oval> INTEGER DECIMAL
152 %token<ival> OPERAND
153 %type<cval> condition and_condition
154 %type<lval> relation
155 %type<eval> expression
156 %type<gval> range range_or_integer
157 %type<rval> range_list
158 
159 %%
160 
161 rules: rule
162         | rules ';' rule
163         ;
164 
165 rule:   KEYWORD ':' condition samples
166         {
167           struct cldr_plural_rule_ty *rule = new_rule ($1, $3);
168           struct cldr_plural_rule_list_ty *result = arg->result;
169           if (result->nitems == result->nitems_max)
170             {
171               result->nitems_max = result->nitems_max * 2 + 1;
172               result->items = xrealloc (result->items,
173                                         sizeof (struct cldr_plural_rule_ty *)
174                                         * result->nitems_max);
175             }
176           result->items[result->nitems++] = rule;
177         }
178         | OTHER ':' samples
179         ;
180 
181 condition: and_condition
182         {
183           $$ = $1;
184         }
185         | condition OR and_condition
186         {
187           $$ = new_branch_condition (CLDR_PLURAL_CONDITION_OR, $1, $3);
188         }
189         ;
190 
191 and_condition: relation
192         {
193           $$ = new_leaf_condition ($1);
194         }
195         | and_condition AND relation
196         {
197           $$ = new_branch_condition (CLDR_PLURAL_CONDITION_AND,
198                                      $1,
199                                      new_leaf_condition ($3));
200         }
201         ;
202 
203 relation: expression '=' range_list
204         {
205           $$ = new_relation ($1, CLDR_PLURAL_RELATION_EQUAL, $3);
206         }
207         | expression '!' range_list
208         {
209           $$ = new_relation ($1, CLDR_PLURAL_RELATION_NOT_EQUAL, $3);
210         }
211         ;
212 
213 expression: OPERAND
214         {
215           $$ = new_expression ($1, 0);
216         }
217         | OPERAND '%' INTEGER
218         {
219           $$ = new_expression ($1, $3->value.ival);
220         }
221         ;
222 
223 range_list: range_or_integer
224         {
225           struct cldr_plural_range_list_ty *ranges =
226             XMALLOC (struct cldr_plural_range_list_ty);
227           memset (ranges, 0, sizeof (struct cldr_plural_range_list_ty));
228           $$ = add_range (ranges, $1);
229         }
230         | range_list ',' range_or_integer
231         {
232           $$ = add_range ($1, $3);
233         }
234         ;
235 
236 range_or_integer: range
237         {
238           $$ = $1;
239         }
240         | INTEGER
241         {
242           $$ = new_range ($1, $1);
243         }
244         ;
245 
246 range: INTEGER RANGE INTEGER
247         {
248           $$ = new_range ($1, $3);
249         }
250         ;
251 
252 /* FIXME: collect samples */
253 samples: at_integer at_decimal
254         ;
255 
256 at_integer: %empty
257         | AT_INTEGER sample_list
258         ;
259 
260 at_decimal: %empty
261         | AT_DECIMAL sample_list
262         ;
263 
264 sample_list: sample_list1 sample_ellipsis
265         ;
266 sample_list1: sample_range
267         | sample_list1 ',' sample_range
268         ;
269 sample_ellipsis: %empty
270         | ',' ELLIPSIS
271         ;
272 
273 sample_range: DECIMAL
274 	{ free ($1); }
275         | DECIMAL '~' DECIMAL
276         { free ($1); free ($3); }
277         | INTEGER
278         { free ($1); }
279         | INTEGER '~' INTEGER
280 	{ free ($1); free ($3); }
281         ;
282 
283 %%
284 
285 static int
286 yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg)
287 {
288   const char *exp = arg->cp;
289   ucs4_t uc;
290   int length;
291   int result;
292   static char *buffer;
293   static size_t bufmax;
294   size_t bufpos;
295 
296   while (1)
297     {
298       if (exp[0] == '\0')
299         {
300           arg->cp = exp;
301           return YYEOF;
302         }
303 
304       if (exp[0] != ' ' && exp[0] != '\t')
305         break;
306 
307       ++exp;
308     }
309 
310   length = u8_mbtouc (&uc, (const uint8_t *) exp, arg->cp_end - exp);
311   if (uc == 0x2026)
312     {
313       arg->cp = exp + length;
314       return ELLIPSIS;
315     }
316   else if (strncmp ("...", exp, 3) == 0)
317     {
318       arg->cp = exp + 3;
319       return ELLIPSIS;
320     }
321   else if (strncmp ("..", exp, 2) == 0)
322     {
323       arg->cp = exp + 2;
324       return RANGE;
325     }
326   else if (strncmp ("other", exp, 5) == 0)
327     {
328       arg->cp = exp + 5;
329       return OTHER;
330     }
331   else if (strncmp ("@integer", exp, 8) == 0)
332     {
333       arg->cp = exp + 8;
334       return AT_INTEGER;
335     }
336   else if (strncmp ("@decimal", exp, 8) == 0)
337     {
338       arg->cp = exp + 8;
339       return AT_DECIMAL;
340     }
341 
342   result = *exp++;
343   switch (result)
344     {
345     case '0': case '1': case '2': case '3': case '4':
346     case '5': case '6': case '7': case '8': case '9':
347       {
348         unsigned long int ival = result - '0';
349 
350         while (exp[0] >= '0' && exp[0] <= '9')
351           {
352             ival *= 10;
353             ival += exp[0] - '0';
354             ++exp;
355           }
356 
357         lval->oval = XMALLOC (struct cldr_plural_operand_ty);
358         if (exp[0] == '.' && exp[1] >= '0' && exp[1] <= '9')
359           {
360             double dval = ival;
361             int denominator = 10, nfractions = 0;
362             ++exp;
363             while (exp[0] >= '0' && exp[0] <= '9')
364               {
365                 dval += (exp[0] - '0') / (double) denominator;
366                 denominator *= 10;
367                 ++nfractions;
368                 ++exp;
369               }
370             lval->oval->type = CLDR_PLURAL_OPERAND_DECIMAL;
371             lval->oval->value.dval.d = dval;
372             lval->oval->value.dval.nfractions = nfractions;
373             result = DECIMAL;
374           }
375         else
376           {
377             lval->oval->type = CLDR_PLURAL_OPERAND_INTEGER;
378             lval->oval->value.ival = ival;
379             result = INTEGER;
380           }
381       }
382       break;
383     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
384     case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
385     case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
386     case 'v': case 'w': case 'x': case 'y': case 'z':
387       bufpos = 0;
388       for (;;)
389         {
390           if (bufpos >= bufmax)
391             {
392               bufmax = 2 * bufmax + 10;
393               buffer = xrealloc (buffer, bufmax);
394             }
395           buffer[bufpos++] = result;
396           result = *exp;
397           switch (result)
398             {
399             case 'a': case 'b': case 'c': case 'd': case 'e':
400             case 'f': case 'g': case 'h': case 'i': case 'j':
401             case 'k': case 'l': case 'm': case 'n': case 'o':
402             case 'p': case 'q': case 'r': case 's': case 't':
403             case 'u': case 'v': case 'w': case 'x': case 'y':
404             case 'z':
405               ++exp;
406               continue;
407             default:
408               break;
409             }
410           break;
411         }
412 
413       if (bufpos >= bufmax)
414         {
415           bufmax = 2 * bufmax + 10;
416           buffer = xrealloc (buffer, bufmax);
417         }
418       buffer[bufpos] = '\0';
419 
420       /* Operands.  */
421       if (bufpos == 1)
422         {
423           switch (buffer[0])
424             {
425             case 'n': case 'i': case 'f': case 't': case 'v': case 'w':
426               arg->cp = exp;
427               lval->ival = buffer[0];
428               return OPERAND;
429             default:
430               break;
431             }
432         }
433 
434       /* Keywords.  */
435       if (strcmp (buffer, "and") == 0)
436         {
437           arg->cp = exp;
438           return AND;
439         }
440       else if (strcmp (buffer, "or") == 0)
441         {
442           arg->cp = exp;
443           return OR;
444         }
445 
446       lval->sval = xstrdup (buffer);
447       result = KEYWORD;
448       break;
449     case '!':
450       if (exp[0] == '=')
451         {
452           ++exp;
453           result = '!';
454         }
455       else
456         result = YYERRCODE;
457       break;
458     default:
459       break;
460     }
461 
462   arg->cp = exp;
463 
464   return result;
465 }
466 
467 static void
468 yyerror (struct cldr_plural_parse_args *arg, char const *s)
469 {
470   fprintf (stderr, "%s\n", s);
471 }
472