• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* xgettext awk backend.
2    Copyright (C) 2002-2003, 2005-2009, 2018-2019 Free Software Foundation, Inc.
3 
4    This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22 
23 /* Specification.  */
24 #include "x-awk.h"
25 
26 #include <errno.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 #include "message.h"
33 #include "xgettext.h"
34 #include "xg-pos.h"
35 #include "xg-mixed-string.h"
36 #include "xg-arglist-context.h"
37 #include "xg-arglist-callshape.h"
38 #include "xg-arglist-parser.h"
39 #include "xg-message.h"
40 #include "error.h"
41 #include "error-progname.h"
42 #include "xalloc.h"
43 #include "gettext.h"
44 
45 #define _(s) gettext(s)
46 
47 
48 /* The awk syntax is defined in the gawk manual page and documentation.
49    See also gawk/awkgram.y.  */
50 
51 
52 /* ====================== Keyword set customization.  ====================== */
53 
54 /* If true extract all strings.  */
55 static bool extract_all = false;
56 
57 static hash_table keywords;
58 static bool default_keywords = true;
59 
60 
61 void
x_awk_extract_all()62 x_awk_extract_all ()
63 {
64   extract_all = true;
65 }
66 
67 
68 void
x_awk_keyword(const char * name)69 x_awk_keyword (const char *name)
70 {
71   if (name == NULL)
72     default_keywords = false;
73   else
74     {
75       const char *end;
76       struct callshape shape;
77       const char *colon;
78 
79       if (keywords.table == NULL)
80         hash_init (&keywords, 100);
81 
82       split_keywordspec (name, &end, &shape);
83 
84       /* The characters between name and end should form a valid C identifier.
85          A colon means an invalid parse in split_keywordspec().  */
86       colon = strchr (name, ':');
87       if (colon == NULL || colon >= end)
88         insert_keyword_callshape (&keywords, name, end - name, &shape);
89     }
90 }
91 
92 /* Finish initializing the keywords hash table.
93    Called after argument processing, before each file is processed.  */
94 static void
init_keywords()95 init_keywords ()
96 {
97   if (default_keywords)
98     {
99       /* When adding new keywords here, also update the documentation in
100          xgettext.texi!  */
101       x_awk_keyword ("dcgettext");
102       x_awk_keyword ("dcngettext:1,2");
103       default_keywords = false;
104     }
105 }
106 
107 void
init_flag_table_awk()108 init_flag_table_awk ()
109 {
110   xgettext_record_flag ("dcgettext:1:pass-awk-format");
111   xgettext_record_flag ("dcngettext:1:pass-awk-format");
112   xgettext_record_flag ("dcngettext:2:pass-awk-format");
113   xgettext_record_flag ("printf:1:awk-format");
114 }
115 
116 
117 /* ======================== Reading of characters.  ======================== */
118 
119 /* The input file stream.  */
120 static FILE *fp;
121 
122 /* These are for tracking whether comments count as immediately before
123    keyword.  */
124 static int last_comment_line;
125 static int last_non_comment_line;
126 
127 
128 /* 1. line_number handling.  */
129 
130 static int
phase1_getc()131 phase1_getc ()
132 {
133   int c = getc (fp);
134 
135   if (c == EOF)
136     {
137       if (ferror (fp))
138         error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
139                real_file_name);
140       return EOF;
141     }
142 
143   if (c == '\n')
144     line_number++;
145 
146   return c;
147 }
148 
149 /* Supports only one pushback character.  */
150 static void
phase1_ungetc(int c)151 phase1_ungetc (int c)
152 {
153   if (c != EOF)
154     {
155       if (c == '\n')
156         --line_number;
157 
158       ungetc (c, fp);
159     }
160 }
161 
162 
163 /* 2. Replace each comment that is not inside a string literal or regular
164    expression with a newline character.  We need to remember the comment
165    for later, because it may be attached to a keyword string.  */
166 
167 static int
phase2_getc()168 phase2_getc ()
169 {
170   static char *buffer;
171   static size_t bufmax;
172   size_t buflen;
173   int lineno;
174   int c;
175 
176   c = phase1_getc ();
177   if (c == '#')
178     {
179       buflen = 0;
180       lineno = line_number;
181       for (;;)
182         {
183           c = phase1_getc ();
184           if (c == '\n' || c == EOF)
185             break;
186           /* We skip all leading white space, but not EOLs.  */
187           if (!(buflen == 0 && (c == ' ' || c == '\t')))
188             {
189               if (buflen >= bufmax)
190                 {
191                   bufmax = 2 * bufmax + 10;
192                   buffer = xrealloc (buffer, bufmax);
193                 }
194               buffer[buflen++] = c;
195             }
196         }
197       if (buflen >= bufmax)
198         {
199           bufmax = 2 * bufmax + 10;
200           buffer = xrealloc (buffer, bufmax);
201         }
202       buffer[buflen] = '\0';
203       savable_comment_add (buffer);
204       last_comment_line = lineno;
205     }
206   return c;
207 }
208 
209 /* Supports only one pushback character.  */
210 static void
phase2_ungetc(int c)211 phase2_ungetc (int c)
212 {
213   if (c != EOF)
214     phase1_ungetc (c);
215 }
216 
217 
218 /* ========================== Reading of tokens.  ========================== */
219 
220 
221 enum token_type_ty
222 {
223   token_type_eof,
224   token_type_lparen,            /* ( */
225   token_type_rparen,            /* ) */
226   token_type_comma,             /* , */
227   token_type_string,            /* "abc" */
228   token_type_i18nstring,        /* _"abc" */
229   token_type_symbol,            /* symbol, number */
230   token_type_semicolon,         /* ; */
231   token_type_other              /* regexp, misc. operator */
232 };
233 typedef enum token_type_ty token_type_ty;
234 
235 typedef struct token_ty token_ty;
236 struct token_ty
237 {
238   token_type_ty type;
239   char *string;         /* for token_type_{symbol,string,i18nstring} */
240   int line_number;
241 };
242 
243 
244 /* 7. Replace escape sequences within character strings with their
245    single character equivalents.  */
246 
247 #define P7_QUOTES (1000 + '"')
248 
249 static int
phase7_getc()250 phase7_getc ()
251 {
252   int c;
253 
254   for (;;)
255     {
256       /* Use phase 1, because phase 2 elides comments.  */
257       c = phase1_getc ();
258 
259       if (c == EOF || c == '\n')
260         break;
261       if (c == '"')
262         return P7_QUOTES;
263       if (c != '\\')
264         return c;
265       c = phase1_getc ();
266       if (c == EOF)
267         break;
268       if (c != '\n')
269         switch (c)
270           {
271           case 'a':
272             return '\a';
273           case 'b':
274             return '\b';
275           case 'f':
276             return '\f';
277           case 'n':
278             return '\n';
279           case 'r':
280             return '\r';
281           case 't':
282             return '\t';
283           case 'v':
284             return '\v';
285           case '0': case '1': case '2': case '3': case '4':
286           case '5': case '6': case '7':
287             {
288               int n = c - '0';
289 
290               c = phase1_getc ();
291               if (c != EOF)
292                 {
293                   if (c >= '0' && c <= '7')
294                     {
295                       n = (n << 3) + (c - '0');
296                       c = phase1_getc ();
297                       if (c != EOF)
298                         {
299                           if (c >= '0' && c <= '7')
300                             n = (n << 3) + (c - '0');
301                           else
302                             phase1_ungetc (c);
303                         }
304                     }
305                   else
306                     phase1_ungetc (c);
307                 }
308               return (unsigned char) n;
309             }
310           case 'x':
311             {
312               int n = 0;
313 
314               for (;;)
315                 {
316                   c = phase1_getc ();
317                   if (c == EOF)
318                     break;
319                   else if (c >= '0' && c <= '9')
320                     n = (n << 4) + (c - '0');
321                   else if (c >= 'A' && c <= 'F')
322                     n = (n << 4) + (c - 'A' + 10);
323                   else if (c >= 'a' && c <= 'f')
324                     n = (n << 4) + (c - 'a' + 10);
325                   else
326                     {
327                       phase1_ungetc (c);
328                       break;
329                     }
330                 }
331               return (unsigned char) n;
332             }
333           default:
334             return c;
335           }
336     }
337 
338   phase1_ungetc (c);
339   error_with_progname = false;
340   error (0, 0, _("%s:%d: warning: unterminated string"), logical_file_name,
341          line_number);
342   error_with_progname = true;
343   return P7_QUOTES;
344 }
345 
346 
347 /* Free the memory pointed to by a 'struct token_ty'.  */
348 static inline void
free_token(token_ty * tp)349 free_token (token_ty *tp)
350 {
351   switch (tp->type)
352     {
353     case token_type_string:
354     case token_type_i18nstring:
355     case token_type_symbol:
356       free (tp->string);
357       break;
358     default:
359       break;
360     }
361 }
362 
363 
364 /* Combine characters into tokens.  Discard whitespace.  */
365 
366 /* There is an ambiguity about '/': It can start a division operator ('/' or
367    '/=') or it can start a regular expression.  The distinction is important
368    because inside regular expressions, '#' and '"' lose its special meanings.
369    If you look at the awk grammar, you see that the operator is only allowed
370    right after a 'variable' or 'simp_exp' nonterminal, and these nonterminals
371    can only end in the NAME, LENGTH, YSTRING, YNUMBER, ')', ']' terminals.
372    So we prefer the division operator interpretation only right after
373    symbol, string, number, ')', ']', with whitespace but no newline allowed
374    in between.  */
375 static bool prefer_division_over_regexp;
376 
377 static void
x_awk_lex(token_ty * tp)378 x_awk_lex (token_ty *tp)
379 {
380   static char *buffer;
381   static int bufmax;
382   int bufpos;
383   int c;
384 
385   for (;;)
386     {
387       tp->line_number = line_number;
388       c = phase2_getc ();
389 
390       switch (c)
391         {
392         case EOF:
393           tp->type = token_type_eof;
394           return;
395 
396         case '\n':
397           if (last_non_comment_line > last_comment_line)
398             savable_comment_reset ();
399           /* Newline is not allowed inside expressions.  It usually
400              introduces a fresh statement.
401              FIXME: Newlines after any of ',' '{' '?' ':' '||' '&&' 'do' 'else'
402              does *not* introduce a fresh statement.  */
403           prefer_division_over_regexp = false;
404           /* FALLTHROUGH */
405         case '\t':
406         case ' ':
407           /* Ignore whitespace and comments.  */
408           continue;
409 
410         case '\\':
411           /* Backslash ought to be immediately followed by a newline.  */
412           continue;
413         }
414 
415       last_non_comment_line = tp->line_number;
416 
417       switch (c)
418         {
419         case '.':
420           {
421             int c2 = phase2_getc ();
422             phase2_ungetc (c2);
423             if (!(c2 >= '0' && c2 <= '9'))
424               {
425 
426                 tp->type = token_type_other;
427                 prefer_division_over_regexp = false;
428                 return;
429               }
430           }
431           /* FALLTHROUGH */
432         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
433         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
434         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
435         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
436         case 'Y': case 'Z':
437         case '_':
438         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
439         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
440         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
441         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
442         case 'y': case 'z':
443         case '0': case '1': case '2': case '3': case '4':
444         case '5': case '6': case '7': case '8': case '9':
445           /* Symbol, or part of a number.  */
446           bufpos = 0;
447           for (;;)
448             {
449               if (bufpos >= bufmax)
450                 {
451                   bufmax = 2 * bufmax + 10;
452                   buffer = xrealloc (buffer, bufmax);
453                 }
454               buffer[bufpos++] = c;
455               c = phase2_getc ();
456               switch (c)
457                 {
458                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
459                 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
460                 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
461                 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
462                 case 'Y': case 'Z':
463                 case '_':
464                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
465                 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
466                 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
467                 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
468                 case 'y': case 'z':
469                 case '0': case '1': case '2': case '3': case '4':
470                 case '5': case '6': case '7': case '8': case '9':
471                   continue;
472                 default:
473                   if (bufpos == 1 && buffer[0] == '_' && c == '"')
474                     {
475                       tp->type = token_type_i18nstring;
476                       goto case_string;
477                     }
478                   phase2_ungetc (c);
479                   break;
480                 }
481               break;
482             }
483           if (bufpos >= bufmax)
484             {
485               bufmax = 2 * bufmax + 10;
486               buffer = xrealloc (buffer, bufmax);
487             }
488           buffer[bufpos] = '\0';
489           tp->string = xstrdup (buffer);
490           tp->type = token_type_symbol;
491           /* Most identifiers can be variable names; after them we must
492              interpret '/' as division operator.  But for awk's builtin
493              keywords we have three cases:
494              (a) Must interpret '/' as division operator. "length".
495              (b) Must interpret '/' as start of a regular expression.
496                  "do", "exit", "print", "printf", "return".
497              (c) '/' after this keyword in invalid anyway. All others.
498              I used the following script for the distinction.
499                 for k in $awk_keywords; do
500                   echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null
501                 done
502            */
503           if (strcmp (buffer, "do") == 0
504               || strcmp (buffer, "exit") == 0
505               || strcmp (buffer, "print") == 0
506               || strcmp (buffer, "printf") == 0
507               || strcmp (buffer, "return") == 0)
508             prefer_division_over_regexp = false;
509           else
510             prefer_division_over_regexp = true;
511           return;
512 
513         case '"':
514           tp->type = token_type_string;
515         case_string:
516           bufpos = 0;
517           for (;;)
518             {
519               c = phase7_getc ();
520               if (c == EOF || c == P7_QUOTES)
521                 break;
522               if (bufpos >= bufmax)
523                 {
524                   bufmax = 2 * bufmax + 10;
525                   buffer = xrealloc (buffer, bufmax);
526                 }
527               buffer[bufpos++] = c;
528             }
529           if (bufpos >= bufmax)
530             {
531               bufmax = 2 * bufmax + 10;
532               buffer = xrealloc (buffer, bufmax);
533             }
534           buffer[bufpos] = '\0';
535           tp->string = xstrdup (buffer);
536           prefer_division_over_regexp = true;
537           return;
538 
539         case '(':
540           tp->type = token_type_lparen;
541           prefer_division_over_regexp = false;
542           return;
543 
544         case ')':
545           tp->type = token_type_rparen;
546           prefer_division_over_regexp = true;
547           return;
548 
549         case ',':
550           tp->type = token_type_comma;
551           prefer_division_over_regexp = false;
552           return;
553 
554         case ';':
555           tp->type = token_type_semicolon;
556           prefer_division_over_regexp = false;
557           return;
558 
559         case ']':
560           tp->type = token_type_other;
561           prefer_division_over_regexp = true;
562           return;
563 
564         case '/':
565           if (!prefer_division_over_regexp)
566             {
567               /* Regular expression.
568                  Counting brackets is non-trivial. [[] is balanced, and so is
569                  [\]]. Also, /[/]/ is balanced and ends at the third slash.
570                  Do not count [ or ] if either one is preceded by a \.
571                  A '[' should be counted if
572                   a) it is the first one so far (brackets == 0), or
573                   b) it is the '[' in '[:'.
574                  A ']' should be counted if not preceded by a \.
575                  According to POSIX, []] is how you put a ] into a set.
576                  Try to handle that too.
577                */
578               int brackets = 0;
579               bool pos0 = true;         /* true at start of regexp */
580               bool pos1_open = false;   /* true after [ at start of regexp */
581               bool pos2_open_not = false; /* true after [^ at start of regexp */
582 
583               for (;;)
584                 {
585                   c = phase1_getc ();
586 
587                   if (c == EOF || c == '\n')
588                     {
589                       phase1_ungetc (c);
590                       error_with_progname = false;
591                       error (0, 0, _("%s:%d: warning: unterminated regular expression"),
592                              logical_file_name, line_number);
593                       error_with_progname = true;
594                       break;
595                     }
596                   else if (c == '[')
597                     {
598                       if (brackets == 0)
599                         brackets++;
600                       else
601                         {
602                           c = phase1_getc ();
603                           if (c == ':')
604                             brackets++;
605                           phase1_ungetc (c);
606                         }
607                       if (pos0)
608                         {
609                           pos0 = false;
610                           pos1_open = true;
611                           continue;
612                         }
613                     }
614                   else if (c == ']')
615                     {
616                       if (!(pos1_open || pos2_open_not))
617                         brackets--;
618                     }
619                   else if (c == '^')
620                     {
621                       if (pos1_open)
622                         {
623                           pos1_open = false;
624                           pos2_open_not = true;
625                           continue;
626                         }
627                     }
628                   else if (c == '\\')
629                     {
630                       c = phase1_getc ();
631                       /* Backslash-newline is valid and ignored.  */
632                     }
633                   else if (c == '/')
634                     {
635                       if (brackets <= 0)
636                         break;
637                     }
638 
639                   pos0 = false;
640                   pos1_open = false;
641                   pos2_open_not = false;
642                 }
643 
644               tp->type = token_type_other;
645               prefer_division_over_regexp = false;
646               return;
647             }
648           /* FALLTHROUGH */
649 
650         default:
651           /* We could carefully recognize each of the 2 and 3 character
652              operators, but it is not necessary, as we only need to recognize
653              gettext invocations.  Don't bother.  */
654           tp->type = token_type_other;
655           prefer_division_over_regexp = false;
656           return;
657         }
658     }
659 }
660 
661 
662 /* ========================= Extracting strings.  ========================== */
663 
664 
665 /* Context lookup table.  */
666 static flag_context_list_table_ty *flag_context_list_table;
667 
668 
669 /* The file is broken into tokens.  Scan the token stream, looking for
670    a keyword, followed by a left paren, followed by a string.  When we
671    see this sequence, we have something to remember.  We assume we are
672    looking at a valid C or C++ program, and leave the complaints about
673    the grammar to the compiler.
674 
675      Normal handling: Look for
676        keyword ( ... msgid ... )
677      Plural handling: Look for
678        keyword ( ... msgid ... msgid_plural ... )
679 
680    We use recursion because the arguments before msgid or between msgid
681    and msgid_plural can contain subexpressions of the same form.  */
682 
683 
684 /* Extract messages until the next balanced closing parenthesis.
685    Extracted messages are added to MLP.
686    Return true upon eof, false upon closing parenthesis.  */
687 static bool
extract_parenthesized(message_list_ty * mlp,flag_context_ty outer_context,flag_context_list_iterator_ty context_iter,struct arglist_parser * argparser)688 extract_parenthesized (message_list_ty *mlp,
689                        flag_context_ty outer_context,
690                        flag_context_list_iterator_ty context_iter,
691                        struct arglist_parser *argparser)
692 {
693   /* Current argument number.  */
694   int arg = 1;
695   /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
696   int state;
697   /* Parameters of the keyword just seen.  Defined only in state 1.  */
698   const struct callshapes *next_shapes = NULL;
699   /* Whether to implicitly assume the next tokens are arguments even without
700      a '('.  */
701   bool next_is_argument = false;
702   /* Context iterator that will be used if the next token is a '('.  */
703   flag_context_list_iterator_ty next_context_iter =
704     passthrough_context_list_iterator;
705   /* Current context.  */
706   flag_context_ty inner_context =
707     inherited_context (outer_context,
708                        flag_context_list_iterator_advance (&context_iter));
709 
710   /* Start state is 0.  */
711   state = 0;
712 
713   for (;;)
714     {
715       token_ty token;
716 
717       x_awk_lex (&token);
718 
719       if (next_is_argument && token.type != token_type_lparen)
720         {
721           /* An argument list starts, even though there is no '('.  */
722           context_iter = next_context_iter;
723           outer_context = inner_context;
724           inner_context =
725             inherited_context (outer_context,
726                                flag_context_list_iterator_advance (
727                                  &context_iter));
728         }
729 
730       switch (token.type)
731         {
732         case token_type_symbol:
733           {
734             void *keyword_value;
735 
736             if (hash_find_entry (&keywords, token.string, strlen (token.string),
737                                  &keyword_value)
738                 == 0)
739               {
740                 next_shapes = (const struct callshapes *) keyword_value;
741                 state = 1;
742               }
743             else
744               state = 0;
745           }
746           next_is_argument =
747             (strcmp (token.string, "print") == 0
748              || strcmp (token.string, "printf") == 0);
749           next_context_iter =
750             flag_context_list_iterator (
751               flag_context_list_table_lookup (
752                 flag_context_list_table,
753                 token.string, strlen (token.string)));
754           free (token.string);
755           continue;
756 
757         case token_type_lparen:
758           if (extract_parenthesized (mlp, inner_context, next_context_iter,
759                                      arglist_parser_alloc (mlp,
760                                                            state ? next_shapes : NULL)))
761             {
762               arglist_parser_done (argparser, arg);
763               return true;
764             }
765           next_is_argument = false;
766           next_context_iter = null_context_list_iterator;
767           state = 0;
768           continue;
769 
770         case token_type_rparen:
771           arglist_parser_done (argparser, arg);
772           return false;
773 
774         case token_type_comma:
775           arg++;
776           inner_context =
777             inherited_context (outer_context,
778                                flag_context_list_iterator_advance (
779                                  &context_iter));
780           next_is_argument = false;
781           next_context_iter = passthrough_context_list_iterator;
782           state = 0;
783           continue;
784 
785         case token_type_string:
786           {
787             lex_pos_ty pos;
788             pos.file_name = logical_file_name;
789             pos.line_number = token.line_number;
790 
791             if (extract_all)
792               remember_a_message (mlp, NULL, token.string, false, false,
793                                   inner_context, &pos,
794                                   NULL, savable_comment, false);
795             else
796               {
797                 mixed_string_ty *ms =
798                   mixed_string_alloc_simple (token.string, lc_string,
799                                              pos.file_name, pos.line_number);
800                 free (token.string);
801                 arglist_parser_remember (argparser, arg, ms,
802                                          inner_context,
803                                          pos.file_name, pos.line_number,
804                                          savable_comment, false);
805               }
806           }
807           next_is_argument = false;
808           next_context_iter = null_context_list_iterator;
809           state = 0;
810           continue;
811 
812         case token_type_i18nstring:
813           {
814             lex_pos_ty pos;
815             pos.file_name = logical_file_name;
816             pos.line_number = token.line_number;
817 
818             remember_a_message (mlp, NULL, token.string, false, false,
819                                 inner_context, &pos,
820                                 NULL, savable_comment, false);
821           }
822           next_is_argument = false;
823           next_context_iter = null_context_list_iterator;
824           state = 0;
825           continue;
826 
827         case token_type_semicolon:
828           /* An argument list ends, and a new statement begins.  */
829           /* FIXME: Should handle newline that acts as statement separator
830              in the same way.  */
831           /* FIXME: Instead of resetting outer_context here, it may be better
832              to recurse in the next_is_argument handling above, waiting for
833              the next semicolon or other statement terminator.  */
834           outer_context = null_context;
835           context_iter = null_context_list_iterator;
836           next_is_argument = false;
837           next_context_iter = passthrough_context_list_iterator;
838           inner_context =
839             inherited_context (outer_context,
840                                flag_context_list_iterator_advance (
841                                  &context_iter));
842           state = 0;
843           continue;
844 
845         case token_type_eof:
846           arglist_parser_done (argparser, arg);
847           return true;
848 
849         case token_type_other:
850           next_is_argument = false;
851           next_context_iter = null_context_list_iterator;
852           state = 0;
853           continue;
854 
855         default:
856           abort ();
857         }
858     }
859 }
860 
861 
862 void
extract_awk(FILE * f,const char * real_filename,const char * logical_filename,flag_context_list_table_ty * flag_table,msgdomain_list_ty * mdlp)863 extract_awk (FILE *f,
864              const char *real_filename, const char *logical_filename,
865              flag_context_list_table_ty *flag_table,
866              msgdomain_list_ty *mdlp)
867 {
868   message_list_ty *mlp = mdlp->item[0]->messages;
869 
870   fp = f;
871   real_file_name = real_filename;
872   logical_file_name = xstrdup (logical_filename);
873   line_number = 1;
874 
875   last_comment_line = -1;
876   last_non_comment_line = -1;
877 
878   prefer_division_over_regexp = false;
879 
880   flag_context_list_table = flag_table;
881 
882   init_keywords ();
883 
884   /* Eat tokens until eof is seen.  When extract_parenthesized returns
885      due to an unbalanced closing parenthesis, just restart it.  */
886   while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
887                                  arglist_parser_alloc (mlp, NULL)))
888     ;
889 
890   fp = NULL;
891   real_file_name = NULL;
892   logical_file_name = NULL;
893   line_number = 0;
894 }
895