• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* xgettext Emacs Lisp backend.
2    Copyright (C) 2001-2003, 2005-2009, 2018-2020 Free Software Foundation, Inc.
3 
4    This file was written by Bruno Haible <haible@clisp.cons.org>, 2001-2002.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include "config.h"
21 #endif
22 
23 /* Specification.  */
24 #include "x-elisp.h"
25 
26 #include <errno.h>
27 #include <stdbool.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 #include "message.h"
33 #include "xgettext.h"
34 #include "xg-pos.h"
35 #include "xg-mixed-string.h"
36 #include "xg-arglist-context.h"
37 #include "xg-arglist-callshape.h"
38 #include "xg-arglist-parser.h"
39 #include "xg-message.h"
40 #include "error.h"
41 #include "xalloc.h"
42 #include "mem-hash-map.h"
43 #include "c-ctype.h"
44 #include "gettext.h"
45 
46 #define _(s) gettext(s)
47 
48 
49 /* Summary of Emacs Lisp syntax:
50    - ';' starts a comment until end of line.
51    - '#@nn' starts a comment of nn bytes.
52    - Integers are constituted of an optional prefix (#b, #B for binary,
53      #o, #O for octal, #x, #X for hexadecimal, #nnr, #nnR for any radix),
54      an optional sign (+ or -), the digits, and an optional trailing dot.
55    - Characters are written as '?' followed by the character, possibly
56      with an escape sequence, for examples '?a', '?\n', '?\177'.
57    - Strings are delimited by double quotes. Backslash introduces an escape
58      sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
59      '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
60    - Symbols: can contain meta-characters if preceded by backslash.
61    - Uninterned symbols: written as #:SYMBOL.
62    - () delimit lists.
63    - [] delimit vectors.
64    The reader is implemented in emacs-21.1/src/lread.c.  */
65 
66 
67 /* ====================== Keyword set customization.  ====================== */
68 
69 /* If true extract all strings.  */
70 static bool extract_all = false;
71 
72 static hash_table keywords;
73 static bool default_keywords = true;
74 
75 
76 void
x_elisp_extract_all()77 x_elisp_extract_all ()
78 {
79   extract_all = true;
80 }
81 
82 
83 void
x_elisp_keyword(const char * name)84 x_elisp_keyword (const char *name)
85 {
86   if (name == NULL)
87     default_keywords = false;
88   else
89     {
90       const char *end;
91       struct callshape shape;
92       const char *colon;
93 
94       if (keywords.table == NULL)
95         hash_init (&keywords, 100);
96 
97       split_keywordspec (name, &end, &shape);
98 
99       /* The characters between name and end should form a valid Lisp
100          symbol.  */
101       colon = strchr (name, ':');
102       if (colon == NULL || colon >= end)
103         insert_keyword_callshape (&keywords, name, end - name, &shape);
104     }
105 }
106 
107 /* Finish initializing the keywords hash table.
108    Called after argument processing, before each file is processed.  */
109 static void
init_keywords()110 init_keywords ()
111 {
112   if (default_keywords)
113     {
114       /* When adding new keywords here, also update the documentation in
115          xgettext.texi!  */
116       x_elisp_keyword ("_");
117       default_keywords = false;
118     }
119 }
120 
121 void
init_flag_table_elisp()122 init_flag_table_elisp ()
123 {
124   xgettext_record_flag ("_:1:pass-elisp-format");
125   xgettext_record_flag ("format:1:elisp-format");
126 }
127 
128 
129 /* ======================== Reading of characters.  ======================== */
130 
131 /* The input file stream.  */
132 static FILE *fp;
133 
134 
135 /* Fetch the next character from the input file.  */
136 static int
do_getc()137 do_getc ()
138 {
139   int c = getc (fp);
140 
141   if (c == EOF)
142     {
143       if (ferror (fp))
144         error (EXIT_FAILURE, errno,
145                _("error while reading \"%s\""), real_file_name);
146     }
147   else if (c == '\n')
148    line_number++;
149 
150   return c;
151 }
152 
153 /* Put back the last fetched character, not EOF.  */
154 static void
do_ungetc(int c)155 do_ungetc (int c)
156 {
157   if (c == '\n')
158     line_number--;
159   ungetc (c, fp);
160 }
161 
162 
163 /* ========================== Reading of tokens.  ========================== */
164 
165 
166 /* A token consists of a sequence of characters.  */
167 struct token
168 {
169   int allocated;                /* number of allocated 'token_char's */
170   int charcount;                /* number of used 'token_char's */
171   char *chars;                  /* the token's constituents */
172 };
173 
174 /* Initialize a 'struct token'.  */
175 static inline void
init_token(struct token * tp)176 init_token (struct token *tp)
177 {
178   tp->allocated = 10;
179   tp->chars = XNMALLOC (tp->allocated, char);
180   tp->charcount = 0;
181 }
182 
183 /* Free the memory pointed to by a 'struct token'.  */
184 static inline void
free_token(struct token * tp)185 free_token (struct token *tp)
186 {
187   free (tp->chars);
188 }
189 
190 /* Ensure there is enough room in the token for one more character.  */
191 static inline void
grow_token(struct token * tp)192 grow_token (struct token *tp)
193 {
194   if (tp->charcount == tp->allocated)
195     {
196       tp->allocated *= 2;
197       tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
198     }
199 }
200 
201 /* Test whether a token has integer syntax.  */
202 static inline bool
is_integer(const char * p)203 is_integer (const char *p)
204 {
205   /* NB: Yes, '+.' and '-.' both designate the integer 0.  */
206   const char *p_start = p;
207 
208   if (*p == '+' || *p == '-')
209     p++;
210   if (*p == '\0')
211     return false;
212   while (*p >= '0' && *p <= '9')
213     p++;
214   if (p > p_start && *p == '.')
215     p++;
216   return (*p == '\0');
217 }
218 
219 /* Test whether a token has float syntax.  */
220 static inline bool
is_float(const char * p)221 is_float (const char *p)
222 {
223   enum { LEAD_INT = 1, DOT_CHAR = 2, TRAIL_INT = 4, E_CHAR = 8, EXP_INT = 16 };
224   int state;
225 
226   state = 0;
227   if (*p == '+' || *p == '-')
228     p++;
229   if (*p >= '0' && *p <= '9')
230     {
231       state |= LEAD_INT;
232       do
233         p++;
234       while (*p >= '0' && *p <= '9');
235     }
236   if (*p == '.')
237     {
238       state |= DOT_CHAR;
239       p++;
240     }
241   if (*p >= '0' && *p <= '9')
242     {
243       state |= TRAIL_INT;
244       do
245         p++;
246       while (*p >= '0' && *p <= '9');
247     }
248   if (*p == 'e' || *p == 'E')
249     {
250       state |= E_CHAR;
251       p++;
252       if (*p == '+' || *p == '-')
253         p++;
254       if (*p >= '0' && *p <= '9')
255         {
256           state |= EXP_INT;
257           do
258             p++;
259           while (*p >= '0' && *p <= '9');
260         }
261       else if (p[-1] == '+'
262                && ((p[0] == 'I' && p[1] == 'N' && p[2] == 'F')
263                    || (p[0] == 'N' && p[1] == 'a' && p[2] == 'N')))
264         {
265           state |= EXP_INT;
266           p += 3;
267         }
268     }
269   return (*p == '\0')
270          && (state == (LEAD_INT | DOT_CHAR | TRAIL_INT)
271              || state == (DOT_CHAR | TRAIL_INT)
272              || state == (LEAD_INT | E_CHAR | EXP_INT)
273              || state == (LEAD_INT | DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT)
274              || state == (DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT));
275 }
276 
277 /* Read the next token.  'first' is the first character, which has already
278    been read.  Returns true for a symbol, false for a number.  */
279 static bool
read_token(struct token * tp,int first)280 read_token (struct token *tp, int first)
281 {
282   int c;
283   bool quoted = false;
284 
285   init_token (tp);
286 
287   c = first;
288 
289   for (;; c = do_getc ())
290     {
291       if (c == EOF)
292         break;
293       if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
294         break;
295       if (c == '\"' || c == '\'' || c == ';' || c == '(' || c == ')'
296           || c == '[' || c == ']' || c == '#')
297         break;
298       if (c == '\\')
299         {
300           quoted = true;
301           c = do_getc ();
302           if (c == EOF)
303             /* Invalid, but be tolerant.  */
304             break;
305         }
306       grow_token (tp);
307       tp->chars[tp->charcount++] = c;
308     }
309   if (c != EOF)
310     do_ungetc (c);
311 
312   if (quoted)
313     return true; /* symbol */
314 
315   /* Add a NUL byte at the end, for is_integer and is_float.  */
316   grow_token (tp);
317   tp->chars[tp->charcount] = '\0';
318 
319   if (is_integer (tp->chars) || is_float (tp->chars))
320     return false; /* number */
321   else
322     return true; /* symbol */
323 }
324 
325 
326 /* ========================= Accumulating comments ========================= */
327 
328 
329 static char *buffer;
330 static size_t bufmax;
331 static size_t buflen;
332 
333 static inline void
comment_start()334 comment_start ()
335 {
336   buflen = 0;
337 }
338 
339 static inline void
comment_add(int c)340 comment_add (int c)
341 {
342   if (buflen >= bufmax)
343     {
344       bufmax = 2 * bufmax + 10;
345       buffer = xrealloc (buffer, bufmax);
346     }
347   buffer[buflen++] = c;
348 }
349 
350 static inline void
comment_line_end(size_t chars_to_remove)351 comment_line_end (size_t chars_to_remove)
352 {
353   buflen -= chars_to_remove;
354   while (buflen >= 1
355          && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
356     --buflen;
357   if (chars_to_remove == 0 && buflen >= bufmax)
358     {
359       bufmax = 2 * bufmax + 10;
360       buffer = xrealloc (buffer, bufmax);
361     }
362   buffer[buflen] = '\0';
363   savable_comment_add (buffer);
364 }
365 
366 
367 /* These are for tracking whether comments count as immediately before
368    keyword.  */
369 static int last_comment_line;
370 static int last_non_comment_line;
371 
372 
373 /* ========================= Accumulating messages ========================= */
374 
375 
376 static message_list_ty *mlp;
377 
378 
379 /* ============== Reading of objects.  See CLHS 2 "Syntax".  ============== */
380 
381 
382 /* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
383    Other objects need not to be represented precisely.  */
384 enum object_type
385 {
386   t_symbol,     /* symbol */
387   t_string,     /* string */
388   t_other,      /* other kind of real object */
389   t_dot,        /* '.' pseudo object */
390   t_listclose,  /* ')' pseudo object */
391   t_vectorclose,/* ']' pseudo object */
392   t_eof         /* EOF marker */
393 };
394 
395 struct object
396 {
397   enum object_type type;
398   struct token *token;          /* for t_symbol and t_string */
399   int line_number_at_start;     /* for t_string */
400 };
401 
402 /* Free the memory pointed to by a 'struct object'.  */
403 static inline void
free_object(struct object * op)404 free_object (struct object *op)
405 {
406   if (op->type == t_symbol || op->type == t_string)
407     {
408       free_token (op->token);
409       free (op->token);
410     }
411 }
412 
413 /* Convert a t_symbol/t_string token to a char*.  */
414 static char *
string_of_object(const struct object * op)415 string_of_object (const struct object *op)
416 {
417   char *str;
418   int n;
419 
420   if (!(op->type == t_symbol || op->type == t_string))
421     abort ();
422   n = op->token->charcount;
423   str = XNMALLOC (n + 1, char);
424   memcpy (str, op->token->chars, n);
425   str[n] = '\0';
426   return str;
427 }
428 
429 /* Context lookup table.  */
430 static flag_context_list_table_ty *flag_context_list_table;
431 
432 /* Returns the character represented by an escape sequence.  */
433 #define IGNORABLE_ESCAPE (EOF - 1)
434 static int
do_getc_escaped(int c,bool in_string)435 do_getc_escaped (int c, bool in_string)
436 {
437   switch (c)
438     {
439     case 'a':
440       return '\a';
441     case 'b':
442       return '\b';
443     case 'd':
444       return 0x7F;
445     case 'e':
446       return 0x1B;
447     case 'f':
448       return '\f';
449     case 'n':
450       return '\n';
451     case 'r':
452       return '\r';
453     case 't':
454       return '\t';
455     case 'v':
456       return '\v';
457 
458     case '\n':
459       return IGNORABLE_ESCAPE;
460 
461     case ' ':
462       return (in_string ? IGNORABLE_ESCAPE : ' ');
463 
464     case 'M': /* meta */
465       c = do_getc ();
466       if (c == EOF)
467         return EOF;
468       if (c != '-')
469         /* Invalid input.  But be tolerant.  */
470         return c;
471       c = do_getc ();
472       if (c == EOF)
473         return EOF;
474       if (c == '\\')
475         {
476           c = do_getc ();
477           if (c == EOF)
478             return EOF;
479           c = do_getc_escaped (c, false);
480         }
481       return c | 0x80;
482 
483     case 'S': /* shift */
484       c = do_getc ();
485       if (c == EOF)
486         return EOF;
487       if (c != '-')
488         /* Invalid input.  But be tolerant.  */
489         return c;
490       c = do_getc ();
491       if (c == EOF)
492         return EOF;
493       if (c == '\\')
494         {
495           c = do_getc ();
496           if (c == EOF)
497             return EOF;
498           c = do_getc_escaped (c, false);
499         }
500       return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
501 
502     case 'H': /* hyper */
503     case 'A': /* alt */
504     case 's': /* super */
505       c = do_getc ();
506       if (c == EOF)
507         return EOF;
508       if (c != '-')
509         /* Invalid input.  But be tolerant.  */
510         return c;
511       c = do_getc ();
512       if (c == EOF)
513         return EOF;
514       if (c == '\\')
515         {
516           c = do_getc ();
517           if (c == EOF)
518             return EOF;
519           c = do_getc_escaped (c, false);
520         }
521       return c;
522 
523     case 'C': /* ctrl */
524       c = do_getc ();
525       if (c == EOF)
526         return EOF;
527       if (c != '-')
528         /* Invalid input.  But be tolerant.  */
529         return c;
530       /*FALLTHROUGH*/
531     case '^':
532       c = do_getc ();
533       if (c == EOF)
534         return EOF;
535       if (c == '\\')
536         {
537           c = do_getc ();
538           if (c == EOF)
539             return EOF;
540           c = do_getc_escaped (c, false);
541         }
542       if (c == '?')
543         return 0x7F;
544       if ((c & 0x5F) >= 0x41 && (c & 0x5F) <= 0x5A)
545         return c & 0x9F;
546       if ((c & 0x7F) >= 0x40 && (c & 0x7F) <= 0x5F)
547         return c & 0x9F;
548 #if 0 /* We cannot handle NUL bytes in strings.  */
549       if (c == ' ')
550         return 0x00;
551 #endif
552       return c;
553 
554     case '0': case '1': case '2': case '3': case '4':
555     case '5': case '6': case '7':
556       /* An octal escape, as in ANSI C.  */
557       {
558         int n = c - '0';
559 
560         c = do_getc ();
561         if (c != EOF)
562           {
563             if (c >= '0' && c <= '7')
564               {
565                 n = (n << 3) + (c - '0');
566                 c = do_getc ();
567                 if (c != EOF)
568                   {
569                     if (c >= '0' && c <= '7')
570                       n = (n << 3) + (c - '0');
571                     else
572                       do_ungetc (c);
573                   }
574               }
575             else
576               do_ungetc (c);
577           }
578         return (unsigned char) n;
579       }
580 
581     case 'x':
582       /* A hexadecimal escape, as in ANSI C.  */
583       {
584         int n = 0;
585 
586         for (;;)
587           {
588             c = do_getc ();
589             if (c == EOF)
590               break;
591             else if (c >= '0' && c <= '9')
592               n = (n << 4) + (c - '0');
593             else if (c >= 'A' && c <= 'F')
594               n = (n << 4) + (c - 'A' + 10);
595             else if (c >= 'a' && c <= 'f')
596               n = (n << 4) + (c - 'a' + 10);
597             else
598               {
599                 do_ungetc (c);
600                 break;
601               }
602           }
603         return (unsigned char) n;
604       }
605 
606     default:
607       /* Ignore Emacs multibyte character stuff.  All the strings we are
608          interested in are ASCII strings.  */
609       return c;
610     }
611 }
612 
613 /* Read the next object.
614    'first_in_list' and 'new_backquote_flag' are used for reading old
615    backquote syntax and new backquote syntax.  */
616 static void
read_object(struct object * op,bool first_in_list,bool new_backquote_flag,flag_context_ty outer_context)617 read_object (struct object *op, bool first_in_list, bool new_backquote_flag,
618              flag_context_ty outer_context)
619 {
620   for (;;)
621     {
622       int c;
623 
624       c = do_getc ();
625 
626       switch (c)
627         {
628         case EOF:
629           op->type = t_eof;
630           return;
631 
632         case '\n':
633           /* Comments assumed to be grouped with a message must immediately
634              precede it, with no non-whitespace token on a line between
635              both.  */
636           if (last_non_comment_line > last_comment_line)
637             savable_comment_reset ();
638           continue;
639 
640         case '(':
641           {
642             int arg = 0;                /* Current argument number.  */
643             flag_context_list_iterator_ty context_iter;
644             const struct callshapes *shapes = NULL;
645             struct arglist_parser *argparser = NULL;
646 
647             for (;; arg++)
648               {
649                 struct object inner;
650                 flag_context_ty inner_context;
651 
652                 if (arg == 0)
653                   inner_context = null_context;
654                 else
655                   inner_context =
656                     inherited_context (outer_context,
657                                        flag_context_list_iterator_advance (
658                                          &context_iter));
659 
660                 read_object (&inner, arg == 0, new_backquote_flag,
661                              inner_context);
662 
663                 /* Recognize end of list.  */
664                 if (inner.type == t_listclose)
665                   {
666                     op->type = t_other;
667                     /* Don't bother converting "()" to "NIL".  */
668                     last_non_comment_line = line_number;
669                     if (argparser != NULL)
670                       arglist_parser_done (argparser, arg);
671                     return;
672                   }
673 
674                 /* Dots are not allowed in every position. ']' is not allowed.
675                    But be tolerant.  */
676 
677                 /* EOF inside list is illegal.  But be tolerant.  */
678                 if (inner.type == t_eof)
679                   break;
680 
681                 if (arg == 0)
682                   {
683                     /* This is the function position.  */
684                     if (inner.type == t_symbol)
685                       {
686                         char *symbol_name = string_of_object (&inner);
687                         void *keyword_value;
688 
689                         if (hash_find_entry (&keywords,
690                                              symbol_name, strlen (symbol_name),
691                                              &keyword_value)
692                             == 0)
693                           shapes = (const struct callshapes *) keyword_value;
694 
695                         argparser = arglist_parser_alloc (mlp, shapes);
696 
697                         context_iter =
698                           flag_context_list_iterator (
699                             flag_context_list_table_lookup (
700                               flag_context_list_table,
701                               symbol_name, strlen (symbol_name)));
702 
703                         free (symbol_name);
704                       }
705                     else
706                       context_iter = null_context_list_iterator;
707                   }
708                 else
709                   {
710                     /* These are the argument positions.  */
711                     if (argparser != NULL && inner.type == t_string)
712                       {
713                         char *s = string_of_object (&inner);
714                         mixed_string_ty *ms =
715                           mixed_string_alloc_simple (s, lc_string,
716                                                      logical_file_name,
717                                                      inner.line_number_at_start);
718                         free (s);
719                         arglist_parser_remember (argparser, arg, ms,
720                                                  inner_context,
721                                                  logical_file_name,
722                                                  inner.line_number_at_start,
723                                                  savable_comment, false);
724                       }
725                   }
726 
727                 free_object (&inner);
728               }
729 
730             if (argparser != NULL)
731               arglist_parser_done (argparser, arg);
732           }
733           op->type = t_other;
734           last_non_comment_line = line_number;
735           return;
736 
737         case ')':
738           /* Tell the caller about the end of list.
739              Unmatched closing parenthesis is illegal.  But be tolerant.  */
740           op->type = t_listclose;
741           last_non_comment_line = line_number;
742           return;
743 
744         case '[':
745           {
746             for (;;)
747               {
748                 struct object inner;
749 
750                 read_object (&inner, false, new_backquote_flag, null_context);
751 
752                 /* Recognize end of vector.  */
753                 if (inner.type == t_vectorclose)
754                   {
755                     op->type = t_other;
756                     last_non_comment_line = line_number;
757                     return;
758                   }
759 
760                 /* Dots and ')' are not allowed.  But be tolerant.  */
761 
762                 /* EOF inside vector is illegal.  But be tolerant.  */
763                 if (inner.type == t_eof)
764                   break;
765 
766                 free_object (&inner);
767               }
768           }
769           op->type = t_other;
770           last_non_comment_line = line_number;
771           return;
772 
773         case ']':
774           /* Tell the caller about the end of vector.
775              Unmatched closing bracket is illegal.  But be tolerant.  */
776           op->type = t_vectorclose;
777           last_non_comment_line = line_number;
778           return;
779 
780         case '\'':
781           {
782             struct object inner;
783 
784             read_object (&inner, false, new_backquote_flag, null_context);
785 
786             /* Dots and EOF are not allowed here.  But be tolerant.  */
787 
788             free_object (&inner);
789 
790             op->type = t_other;
791             last_non_comment_line = line_number;
792             return;
793           }
794 
795         case '`':
796           if (first_in_list)
797             goto default_label;
798           {
799             struct object inner;
800 
801             read_object (&inner, false, true, null_context);
802 
803             /* Dots and EOF are not allowed here.  But be tolerant.  */
804 
805             free_object (&inner);
806 
807             op->type = t_other;
808             last_non_comment_line = line_number;
809             return;
810           }
811 
812         case ',':
813           if (!new_backquote_flag)
814             goto default_label;
815           {
816             int c = do_getc ();
817             /* The ,@ handling inside lists is wrong anyway, because
818                ,@form expands to an unknown number of elements.  */
819             if (c != EOF && c != '@' && c != '.')
820               do_ungetc (c);
821           }
822           {
823             struct object inner;
824 
825             read_object (&inner, false, false, null_context);
826 
827             /* Dots and EOF are not allowed here.  But be tolerant.  */
828 
829             free_object (&inner);
830 
831             op->type = t_other;
832             last_non_comment_line = line_number;
833             return;
834           }
835 
836         case ';':
837           {
838             bool all_semicolons = true;
839 
840             last_comment_line = line_number;
841             comment_start ();
842             for (;;)
843               {
844                 int c = do_getc ();
845                 if (c == EOF || c == '\n')
846                   break;
847                 if (c != ';')
848                   all_semicolons = false;
849                 if (!all_semicolons)
850                   {
851                     /* We skip all leading white space, but not EOLs.  */
852                     if (!(buflen == 0 && (c == ' ' || c == '\t')))
853                       comment_add (c);
854                   }
855               }
856             comment_line_end (0);
857             continue;
858           }
859 
860         case '"':
861           {
862             op->token = XMALLOC (struct token);
863             init_token (op->token);
864             op->line_number_at_start = line_number;
865             for (;;)
866               {
867                 int c = do_getc ();
868                 if (c == EOF)
869                   /* Invalid input.  Be tolerant, no error message.  */
870                   break;
871                 if (c == '"')
872                   break;
873                 if (c == '\\')
874                   {
875                     c = do_getc ();
876                     if (c == EOF)
877                       /* Invalid input.  Be tolerant, no error message.  */
878                       break;
879                     c = do_getc_escaped (c, true);
880                     if (c == EOF)
881                       /* Invalid input.  Be tolerant, no error message.  */
882                       break;
883                     if (c == IGNORABLE_ESCAPE)
884                       /* Ignore escaped newline and escaped space.  */
885                       ;
886                     else
887                       {
888                         grow_token (op->token);
889                         op->token->chars[op->token->charcount++] = c;
890                       }
891                   }
892                 else
893                   {
894                     grow_token (op->token);
895                     op->token->chars[op->token->charcount++] = c;
896                   }
897               }
898             op->type = t_string;
899 
900             if (extract_all)
901               {
902                 lex_pos_ty pos;
903 
904                 pos.file_name = logical_file_name;
905                 pos.line_number = op->line_number_at_start;
906                 remember_a_message (mlp, NULL, string_of_object (op), false,
907                                     false, null_context, &pos,
908                                     NULL, savable_comment, false);
909               }
910             last_non_comment_line = line_number;
911             return;
912           }
913 
914         case '?':
915           c = do_getc ();
916           if (c == EOF)
917             /* Invalid input.  Be tolerant, no error message.  */
918             ;
919           else if (c == '\\')
920             {
921               c = do_getc ();
922               if (c == EOF)
923                 /* Invalid input.  Be tolerant, no error message.  */
924                 ;
925               else
926                 {
927                   c = do_getc_escaped (c, false);
928                   if (c == EOF)
929                     /* Invalid input.  Be tolerant, no error message.  */
930                     ;
931                 }
932             }
933           /* Impossible to deal with Emacs multibyte character stuff here.  */
934           op->type = t_other;
935           last_non_comment_line = line_number;
936           return;
937 
938         case '#':
939           /* Dispatch macro handling.  */
940           c = do_getc ();
941           if (c == EOF)
942             /* Invalid input.  Be tolerant, no error message.  */
943             {
944               op->type = t_other;
945               return;
946             }
947 
948           switch (c)
949             {
950             case '^':
951               c = do_getc ();
952               if (c == '^')
953                 c = do_getc ();
954               if (c == '[')
955                 {
956                   /* Read a char table, same syntax as a vector.  */
957                   for (;;)
958                     {
959                       struct object inner;
960 
961                       read_object (&inner, false, new_backquote_flag,
962                                    null_context);
963 
964                       /* Recognize end of vector.  */
965                       if (inner.type == t_vectorclose)
966                         {
967                           op->type = t_other;
968                           last_non_comment_line = line_number;
969                           return;
970                         }
971 
972                       /* Dots and ')' are not allowed.  But be tolerant.  */
973 
974                       /* EOF inside vector is illegal.  But be tolerant.  */
975                       if (inner.type == t_eof)
976                         break;
977 
978                       free_object (&inner);
979                     }
980                   op->type = t_other;
981                   last_non_comment_line = line_number;
982                   return;
983                 }
984               else
985                 /* Invalid input.  Be tolerant, no error message.  */
986                 {
987                   op->type = t_other;
988                   if (c != EOF)
989                     last_non_comment_line = line_number;
990                   return;
991                 }
992 
993             case '&':
994               /* Read a bit vector.  */
995               {
996                 struct object length;
997                 read_object (&length, first_in_list, new_backquote_flag,
998                              null_context);
999                 /* Dots and EOF are not allowed here.
1000                    But be tolerant.  */
1001                 free_object (&length);
1002               }
1003               c = do_getc ();
1004               if (c == '"')
1005                 {
1006                   struct object string;
1007                   read_object (&string, first_in_list, new_backquote_flag,
1008                                null_context);
1009                   free_object (&string);
1010                 }
1011               else
1012                 /* Invalid input.  Be tolerant, no error message.  */
1013                 do_ungetc (c);
1014               op->type = t_other;
1015               last_non_comment_line = line_number;
1016               return;
1017 
1018             case '[':
1019               /* Read a compiled function, same syntax as a vector.  */
1020             case '(':
1021               /* Read a string with properties, same syntax as a list.  */
1022               {
1023                 struct object inner;
1024                 do_ungetc (c);
1025                 read_object (&inner, false, new_backquote_flag, null_context);
1026                 /* Dots and EOF are not allowed here.
1027                    But be tolerant.  */
1028                 free_object (&inner);
1029                 op->type = t_other;
1030                 last_non_comment_line = line_number;
1031                 return;
1032               }
1033 
1034             case '@':
1035               /* Read a comment consisting of a given number of bytes.  */
1036               {
1037                 unsigned int nskip = 0;
1038 
1039                 for (;;)
1040                   {
1041                     c = do_getc ();
1042                     if (!(c >= '0' && c <= '9'))
1043                       break;
1044                     nskip = 10 * nskip + (c - '0');
1045                   }
1046                 if (c != EOF)
1047                   {
1048                     do_ungetc (c);
1049                     for (; nskip > 0; nskip--)
1050                       if (do_getc () == EOF)
1051                         break;
1052                   }
1053                 continue;
1054               }
1055 
1056             case '$':
1057               op->type = t_other;
1058               last_non_comment_line = line_number;
1059               return;
1060 
1061             case '\'':
1062             case ':':
1063             case 'S': case 's': /* XEmacs only */
1064               {
1065                 struct object inner;
1066                 read_object (&inner, false, new_backquote_flag, null_context);
1067                 /* Dots and EOF are not allowed here.
1068                    But be tolerant.  */
1069                 free_object (&inner);
1070                 op->type = t_other;
1071                 last_non_comment_line = line_number;
1072                 return;
1073               }
1074 
1075             case '0': case '1': case '2': case '3': case '4':
1076             case '5': case '6': case '7': case '8': case '9':
1077               /* Read Common Lisp style #n# or #n=.  */
1078               for (;;)
1079                 {
1080                   c = do_getc ();
1081                   if (!(c >= '0' && c <= '9'))
1082                     break;
1083                 }
1084               if (c == EOF)
1085                 /* Invalid input.  Be tolerant, no error message.  */
1086                 {
1087                   op->type = t_other;
1088                   return;
1089                 }
1090               if (c == '=')
1091                 {
1092                   read_object (op, false, new_backquote_flag, outer_context);
1093                   last_non_comment_line = line_number;
1094                   return;
1095                 }
1096               if (c == '#')
1097                 {
1098                   op->type = t_other;
1099                   last_non_comment_line = line_number;
1100                   return;
1101                 }
1102               if (c == 'R' || c == 'r')
1103                 {
1104                   /* Read an integer.  */
1105                   c = do_getc ();
1106                   if (c == '+' || c == '-')
1107                     c = do_getc ();
1108                   for (; c != EOF; c = do_getc ())
1109                     if (!c_isalnum (c))
1110                       {
1111                         do_ungetc (c);
1112                         break;
1113                       }
1114                   op->type = t_other;
1115                   last_non_comment_line = line_number;
1116                   return;
1117                 }
1118               /* Invalid input.  Be tolerant, no error message.  */
1119               op->type = t_other;
1120               last_non_comment_line = line_number;
1121               return;
1122 
1123             case 'X': case 'x':
1124             case 'O': case 'o':
1125             case 'B': case 'b':
1126               {
1127                 /* Read an integer.  */
1128                 c = do_getc ();
1129                 if (c == '+' || c == '-')
1130                   c = do_getc ();
1131                 for (; c != EOF; c = do_getc ())
1132                   if (!c_isalnum (c))
1133                     {
1134                       do_ungetc (c);
1135                       break;
1136                     }
1137                 op->type = t_other;
1138                 last_non_comment_line = line_number;
1139                 return;
1140               }
1141 
1142             case '*': /* XEmacs only */
1143               {
1144                 /* Read a bit-vector.  */
1145                 do
1146                   c = do_getc ();
1147                 while (c == '0' || c == '1');
1148                 if (c != EOF)
1149                   do_ungetc (c);
1150                 op->type = t_other;
1151                 last_non_comment_line = line_number;
1152                 return;
1153               }
1154 
1155             case '+': /* XEmacs only */
1156             case '-': /* XEmacs only */
1157               /* Simply assume every feature expression is true.  */
1158               {
1159                 struct object inner;
1160                 read_object (&inner, false, new_backquote_flag, null_context);
1161                 /* Dots and EOF are not allowed here.
1162                    But be tolerant.  */
1163                 free_object (&inner);
1164                 continue;
1165               }
1166 
1167             default:
1168               /* Invalid input.  Be tolerant, no error message.  */
1169               op->type = t_other;
1170               last_non_comment_line = line_number;
1171               return;
1172             }
1173 
1174           /*NOTREACHED*/
1175           abort ();
1176 
1177         case '.':
1178           c = do_getc ();
1179           if (c != EOF)
1180             {
1181               do_ungetc (c);
1182               if (c <= ' ' /* FIXME: Assumes ASCII compatible encoding */
1183                   || strchr ("\"'`,(", c) != NULL)
1184                 {
1185                   op->type = t_dot;
1186                   last_non_comment_line = line_number;
1187                   return;
1188                 }
1189             }
1190           c = '.';
1191           /*FALLTHROUGH*/
1192         default:
1193         default_label:
1194           if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
1195             continue;
1196           /* Read a token.  */
1197           {
1198             bool symbol;
1199 
1200             op->token = XMALLOC (struct token);
1201             symbol = read_token (op->token, c);
1202             if (symbol)
1203               {
1204                 op->type = t_symbol;
1205                 last_non_comment_line = line_number;
1206                 return;
1207               }
1208             else
1209               {
1210                 free_token (op->token);
1211                 free (op->token);
1212                 op->type = t_other;
1213                 last_non_comment_line = line_number;
1214                 return;
1215               }
1216           }
1217         }
1218     }
1219 }
1220 
1221 
1222 void
extract_elisp(FILE * f,const char * real_filename,const char * logical_filename,flag_context_list_table_ty * flag_table,msgdomain_list_ty * mdlp)1223 extract_elisp (FILE *f,
1224                const char *real_filename, const char *logical_filename,
1225                flag_context_list_table_ty *flag_table,
1226                msgdomain_list_ty *mdlp)
1227 {
1228   mlp = mdlp->item[0]->messages;
1229 
1230   fp = f;
1231   real_file_name = real_filename;
1232   logical_file_name = xstrdup (logical_filename);
1233   line_number = 1;
1234 
1235   last_comment_line = -1;
1236   last_non_comment_line = -1;
1237 
1238   flag_context_list_table = flag_table;
1239 
1240   init_keywords ();
1241 
1242   /* Eat tokens until eof is seen.  When read_object returns
1243      due to an unbalanced closing parenthesis, just restart it.  */
1244   do
1245     {
1246       struct object toplevel_object;
1247 
1248       read_object (&toplevel_object, false, false, null_context);
1249 
1250       if (toplevel_object.type == t_eof)
1251         break;
1252 
1253       free_object (&toplevel_object);
1254     }
1255   while (!feof (fp));
1256 
1257   /* Close scanner.  */
1258   fp = NULL;
1259   real_file_name = NULL;
1260   logical_file_name = NULL;
1261   line_number = 0;
1262 }
1263