1 /* xgettext Java backend.
2 Copyright (C) 2003, 2005-2009, 2018-2020 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2003.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include "config.h"
20 #endif
21
22 /* Specification. */
23 #include "x-java.h"
24
25 #include <errno.h>
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #include "message.h"
32 #include "rc-str-list.h"
33 #include "xgettext.h"
34 #include "xg-pos.h"
35 #include "xg-encoding.h"
36 #include "xg-mixed-string.h"
37 #include "xg-arglist-context.h"
38 #include "xg-arglist-callshape.h"
39 #include "xg-arglist-parser.h"
40 #include "xg-message.h"
41 #include "error.h"
42 #include "error-progname.h"
43 #include "xalloc.h"
44 #include "mem-hash-map.h"
45 #include "po-charset.h"
46 #include "unistr.h"
47 #include "unictype.h"
48 #include "gettext.h"
49
50 #define _(s) gettext(s)
51
52 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
53
54
55 /* The Java syntax is defined in the
56 Java Language Specification
57 (available from https://docs.oracle.com/javase/specs/),
58 chapter 3 "Lexical Structure". */
59
60
61 /* ====================== Keyword set customization. ====================== */
62
63 /* If true extract all strings. */
64 static bool extract_all = false;
65
66 static hash_table keywords;
67 static bool default_keywords = true;
68
69
70 void
x_java_extract_all()71 x_java_extract_all ()
72 {
73 extract_all = true;
74 }
75
76
77 void
x_java_keyword(const char * name)78 x_java_keyword (const char *name)
79 {
80 if (name == NULL)
81 default_keywords = false;
82 else
83 {
84 const char *end;
85 struct callshape shape;
86 const char *colon;
87
88 if (keywords.table == NULL)
89 hash_init (&keywords, 100);
90
91 split_keywordspec (name, &end, &shape);
92
93 /* The characters between name and end should form a valid Java
94 identifier sequence with dots.
95 A colon means an invalid parse in split_keywordspec(). */
96 colon = strchr (name, ':');
97 if (colon == NULL || colon >= end)
98 insert_keyword_callshape (&keywords, name, end - name, &shape);
99 }
100 }
101
102 /* Finish initializing the keywords hash table.
103 Called after argument processing, before each file is processed. */
104 static void
init_keywords()105 init_keywords ()
106 {
107 if (default_keywords)
108 {
109 /* When adding new keywords here, also update the documentation in
110 xgettext.texi! */
111 x_java_keyword ("GettextResource.gettext:2"); /* static method */
112 x_java_keyword ("GettextResource.ngettext:2,3"); /* static method */
113 x_java_keyword ("GettextResource.pgettext:2c,3"); /* static method */
114 x_java_keyword ("GettextResource.npgettext:2c,3,4"); /* static method */
115 x_java_keyword ("gettext");
116 x_java_keyword ("ngettext:1,2");
117 x_java_keyword ("pgettext:1c,2");
118 x_java_keyword ("npgettext:1c,2,3");
119 x_java_keyword ("getString"); /* ResourceBundle.getString */
120 default_keywords = false;
121 }
122 }
123
124 void
init_flag_table_java()125 init_flag_table_java ()
126 {
127 xgettext_record_flag ("GettextResource.gettext:2:pass-java-format");
128 xgettext_record_flag ("GettextResource.gettext:2:pass-java-printf-format");
129 xgettext_record_flag ("GettextResource.ngettext:2:pass-java-format");
130 xgettext_record_flag ("GettextResource.ngettext:2:pass-java-printf-format");
131 xgettext_record_flag ("GettextResource.ngettext:3:pass-java-format");
132 xgettext_record_flag ("GettextResource.ngettext:3:pass-java-printf-format");
133 xgettext_record_flag ("GettextResource.pgettext:3:pass-java-format");
134 xgettext_record_flag ("GettextResource.pgettext:3:pass-java-printf-format");
135 xgettext_record_flag ("GettextResource.npgettext:3:pass-java-format");
136 xgettext_record_flag ("GettextResource.npgettext:3:pass-java-printf-format");
137 xgettext_record_flag ("GettextResource.npgettext:4:pass-java-format");
138 xgettext_record_flag ("GettextResource.npgettext:4:pass-java-printf-format");
139 xgettext_record_flag ("gettext:1:pass-java-format");
140 xgettext_record_flag ("gettext:1:pass-java-printf-format");
141 xgettext_record_flag ("ngettext:1:pass-java-format");
142 xgettext_record_flag ("ngettext:1:pass-java-printf-format");
143 xgettext_record_flag ("ngettext:2:pass-java-format");
144 xgettext_record_flag ("ngettext:2:pass-java-printf-format");
145 xgettext_record_flag ("pgettext:2:pass-java-format");
146 xgettext_record_flag ("pgettext:2:pass-java-printf-format");
147 xgettext_record_flag ("npgettext:2:pass-java-format");
148 xgettext_record_flag ("npgettext:2:pass-java-printf-format");
149 xgettext_record_flag ("npgettext:3:pass-java-format");
150 xgettext_record_flag ("npgettext:3:pass-java-printf-format");
151 xgettext_record_flag ("getString:1:pass-java-format");
152 xgettext_record_flag ("getString:1:pass-java-printf-format");
153 xgettext_record_flag ("MessageFormat:1:java-format");
154 xgettext_record_flag ("MessageFormat.format:1:java-format");
155 xgettext_record_flag ("String.format:1:java-printf-format");
156 xgettext_record_flag ("printf:1:java-printf-format"); /* PrintStream.printf */
157 }
158
159
160 /* ======================== Reading of characters. ======================== */
161
162 /* The input file stream. */
163 static FILE *fp;
164
165
166 /* Fetch the next single-byte character from the input file.
167 Pushback can consist of an unlimited number of 'u' followed by up to 4
168 other characters. */
169
170 /* Special coding of multiple 'u's in the pushback buffer. */
171 #define MULTIPLE_U(count) (0x1000 + (count))
172
173 static int phase1_pushback[5];
174 static unsigned int phase1_pushback_length;
175
176 static int
phase1_getc()177 phase1_getc ()
178 {
179 int c;
180
181 if (phase1_pushback_length)
182 {
183 c = phase1_pushback[--phase1_pushback_length];
184 if (c >= MULTIPLE_U (0))
185 {
186 if (c > MULTIPLE_U (1))
187 phase1_pushback[phase1_pushback_length++] = c - 1;
188 return 'u';
189 }
190 else
191 return c;
192 }
193
194 c = getc (fp);
195
196 if (c == EOF)
197 {
198 if (ferror (fp))
199 error (EXIT_FAILURE, errno,
200 _("error while reading \"%s\""), real_file_name);
201 }
202
203 return c;
204 }
205
206 /* Supports any number of 'u' and up to 4 arbitrary characters of pushback. */
207 static void
phase1_ungetc(int c)208 phase1_ungetc (int c)
209 {
210 if (c != EOF)
211 {
212 if (c == 'u')
213 {
214 if (phase1_pushback_length > 0
215 && phase1_pushback[phase1_pushback_length - 1] >= MULTIPLE_U (0))
216 phase1_pushback[phase1_pushback_length - 1]++;
217 else
218 {
219 if (phase1_pushback_length == SIZEOF (phase1_pushback))
220 abort ();
221 phase1_pushback[phase1_pushback_length++] = MULTIPLE_U (1);
222 }
223 }
224 else
225 {
226 if (phase1_pushback_length == SIZEOF (phase1_pushback))
227 abort ();
228 phase1_pushback[phase1_pushback_length++] = c;
229 }
230 }
231 }
232
233
234 /* Fetch the next single-byte character or Unicode character from the file.
235 (Here, as in the Java Language Specification, when we say "Unicode
236 character", we actually mean "UTF-16 encoding unit".) */
237
238 /* Return value of phase 2, 3, 4 when EOF is reached. */
239 #define P2_EOF 0xffff
240
241 /* Convert an UTF-16 code point to a return value that can be distinguished
242 from a single-byte return value. */
243 #define UNICODE(code) (0x10000 + (code))
244
245 /* Test a return value of phase 2, 3, 4 whether it designates an UTF-16 code
246 point. */
247 #define IS_UNICODE(p2_result) ((p2_result) >= 0x10000)
248
249 /* Extract the UTF-16 code of a return value that satisfies IS_UNICODE. */
250 #define UTF16_VALUE(p2_result) ((p2_result) - 0x10000)
251
252 /* Reduces a return value of phase 2, 3, 4 by unmasking the UNICODE bit,
253 so that it can be more easily compared against an ASCII character.
254 (RED (c) == 'x') is equivalent to (c == 'x' || c == UNICODE ('x')). */
255 #define RED(p2_result) ((p2_result) & 0xffff)
256
257 static int phase2_pushback[1];
258 static int phase2_pushback_length;
259
260 static int
phase2_getc()261 phase2_getc ()
262 {
263 int c;
264
265 if (phase2_pushback_length)
266 return phase2_pushback[--phase2_pushback_length];
267
268 c = phase1_getc ();
269 if (c == EOF)
270 return P2_EOF;
271 if (c == '\\')
272 {
273 c = phase1_getc ();
274 if (c == 'u')
275 {
276 unsigned int u_count = 1;
277 unsigned char buf[4];
278 unsigned int n;
279 int i;
280
281 for (;;)
282 {
283 c = phase1_getc ();
284 if (c != 'u')
285 break;
286 u_count++;
287 }
288 phase1_ungetc (c);
289
290 n = 0;
291 for (i = 0; i < 4; i++)
292 {
293 c = phase1_getc ();
294
295 if (c >= '0' && c <= '9')
296 n = (n << 4) + (c - '0');
297 else if (c >= 'A' && c <= 'F')
298 n = (n << 4) + (c - 'A' + 10);
299 else if (c >= 'a' && c <= 'f')
300 n = (n << 4) + (c - 'a' + 10);
301 else
302 {
303 phase1_ungetc (c);
304 while (--i >= 0)
305 phase1_ungetc (buf[i]);
306 for (; u_count > 0; u_count--)
307 phase1_ungetc ('u');
308 return '\\';
309 }
310
311 buf[i] = c;
312 }
313 return UNICODE (n);
314 }
315 phase1_ungetc (c);
316 return '\\';
317 }
318 return c;
319 }
320
321 /* Supports only one pushback character. */
322 static void
phase2_ungetc(int c)323 phase2_ungetc (int c)
324 {
325 if (c != P2_EOF)
326 {
327 if (phase2_pushback_length == SIZEOF (phase2_pushback))
328 abort ();
329 phase2_pushback[phase2_pushback_length++] = c;
330 }
331 }
332
333
334 /* Fetch the next single-byte character or Unicode character from the file.
335 With line number handling.
336 Convert line terminators to '\n' or UNICODE ('\n'). */
337
338 static int phase3_pushback[2];
339 static int phase3_pushback_length;
340
341 static int
phase3_getc()342 phase3_getc ()
343 {
344 int c;
345
346 if (phase3_pushback_length)
347 {
348 c = phase3_pushback[--phase3_pushback_length];
349 if (c == '\n')
350 ++line_number;
351 return c;
352 }
353
354 c = phase2_getc ();
355
356 /* Handle line terminators. */
357 if (RED (c) == '\r')
358 {
359 int c1 = phase2_getc ();
360
361 if (RED (c1) != '\n')
362 phase2_ungetc (c1);
363
364 /* Seen line terminator CR or CR/LF. */
365 if (c == '\r' || c1 == '\n')
366 {
367 ++line_number;
368 return '\n';
369 }
370 else
371 return UNICODE ('\n');
372 }
373 else if (RED (c) == '\n')
374 {
375 /* Seen line terminator LF. */
376 if (c == '\n')
377 {
378 ++line_number;
379 return '\n';
380 }
381 else
382 return UNICODE ('\n');
383 }
384
385 return c;
386 }
387
388 /* Supports 2 characters of pushback. */
389 static void
phase3_ungetc(int c)390 phase3_ungetc (int c)
391 {
392 if (c != P2_EOF)
393 {
394 if (c == '\n')
395 --line_number;
396 if (phase3_pushback_length == SIZEOF (phase3_pushback))
397 abort ();
398 phase3_pushback[phase3_pushback_length++] = c;
399 }
400 }
401
402
403 /* ========================= Accumulating strings. ======================== */
404
405 /* See xg-mixed-string.h for the main API. */
406
407 /* Append a character or Unicode character to a 'struct mixed_string_buffer'. */
408 static void
mixed_string_buffer_append(struct mixed_string_buffer * bp,int c)409 mixed_string_buffer_append (struct mixed_string_buffer *bp, int c)
410 {
411 if (IS_UNICODE (c))
412 {
413 /* Append a Unicode character. */
414 mixed_string_buffer_append_unicode (bp, UTF16_VALUE (c));
415 }
416 else
417 {
418 /* Append a single byte. */
419 mixed_string_buffer_append_char (bp, (unsigned char) c);
420 }
421 }
422
423
424 /* ======================== Accumulating comments. ======================== */
425
426
427 /* Accumulating a single comment line. */
428
429 static struct mixed_string_buffer comment_buffer;
430
431 static inline void
comment_start()432 comment_start ()
433 {
434 mixed_string_buffer_init (&comment_buffer, lc_comment,
435 logical_file_name, line_number);
436 }
437
438 static inline bool
comment_at_start()439 comment_at_start ()
440 {
441 return mixed_string_buffer_is_empty (&comment_buffer);
442 }
443
444 static inline void
comment_add(int c)445 comment_add (int c)
446 {
447 mixed_string_buffer_append (&comment_buffer, c);
448 }
449
450 static inline void
comment_line_end(size_t chars_to_remove)451 comment_line_end (size_t chars_to_remove)
452 {
453 char *buffer =
454 mixed_string_contents_free1 (mixed_string_buffer_result (&comment_buffer));
455 size_t buflen = strlen (buffer);
456
457 buflen -= chars_to_remove;
458 while (buflen >= 1
459 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
460 --buflen;
461 buffer[buflen] = '\0';
462 savable_comment_add (buffer);
463 }
464
465
466 /* These are for tracking whether comments count as immediately before
467 keyword. */
468 static int last_comment_line;
469 static int last_non_comment_line;
470
471
472 /* Replace each comment that is not inside a character constant or string
473 literal with a space or newline character. */
474
475 static int
phase4_getc()476 phase4_getc ()
477 {
478 int c0;
479 int c;
480 bool last_was_star;
481
482 c0 = phase3_getc ();
483 if (RED (c0) != '/')
484 return c0;
485 c = phase3_getc ();
486 switch (RED (c))
487 {
488 default:
489 phase3_ungetc (c);
490 return c0;
491
492 case '*':
493 /* C style comment. */
494 comment_start ();
495 last_was_star = false;
496 for (;;)
497 {
498 c = phase3_getc ();
499 if (c == P2_EOF)
500 break;
501 /* We skip all leading white space, but not EOLs. */
502 if (!(comment_at_start () && (RED (c) == ' ' || RED (c) == '\t')))
503 comment_add (c);
504 switch (RED (c))
505 {
506 case '\n':
507 comment_line_end (1);
508 comment_start ();
509 last_was_star = false;
510 continue;
511
512 case '*':
513 last_was_star = true;
514 continue;
515
516 case '/':
517 if (last_was_star)
518 {
519 comment_line_end (2);
520 break;
521 }
522 /* FALLTHROUGH */
523
524 default:
525 last_was_star = false;
526 continue;
527 }
528 break;
529 }
530 last_comment_line = line_number;
531 return ' ';
532
533 case '/':
534 /* C++ style comment. */
535 last_comment_line = line_number;
536 comment_start ();
537 for (;;)
538 {
539 c = phase3_getc ();
540 if (RED (c) == '\n' || c == P2_EOF)
541 break;
542 /* We skip all leading white space, but not EOLs. */
543 if (!(comment_at_start () && (RED (c) == ' ' || RED (c) == '\t')))
544 comment_add (c);
545 }
546 phase3_ungetc (c); /* push back the newline, to decrement line_number */
547 comment_line_end (0);
548 phase3_getc (); /* read the newline again */
549 return '\n';
550 }
551 }
552
553 /* Supports only one pushback character. */
554 static void
phase4_ungetc(int c)555 phase4_ungetc (int c)
556 {
557 phase3_ungetc (c);
558 }
559
560
561 /* ========================== Reading of tokens. ========================== */
562
563 enum token_type_ty
564 {
565 token_type_eof,
566 token_type_lparen, /* ( */
567 token_type_rparen, /* ) */
568 token_type_lbrace, /* { */
569 token_type_rbrace, /* } */
570 token_type_comma, /* , */
571 token_type_dot, /* . */
572 token_type_string_literal, /* "abc", """text block""" */
573 token_type_number, /* 1.23 */
574 token_type_symbol, /* identifier, keyword, null */
575 token_type_plus, /* + */
576 token_type_other /* character literal, misc. operator */
577 };
578 typedef enum token_type_ty token_type_ty;
579
580 typedef struct token_ty token_ty;
581 struct token_ty
582 {
583 token_type_ty type;
584 char *string; /* for token_type_symbol */
585 mixed_string_ty *mixed_string; /* for token_type_string_literal */
586 refcounted_string_list_ty *comment; /* for token_type_string_literal */
587 int line_number;
588 };
589
590
591 /* Free the memory pointed to by a 'struct token_ty'. */
592 static inline void
free_token(token_ty * tp)593 free_token (token_ty *tp)
594 {
595 if (tp->type == token_type_symbol)
596 free (tp->string);
597 if (tp->type == token_type_string_literal)
598 {
599 free (tp->mixed_string);
600 drop_reference (tp->comment);
601 }
602 }
603
604
605 /* Read an escape sequence inside a string literal or character literal. */
606 static inline int
do_getc_escaped()607 do_getc_escaped ()
608 {
609 int c;
610
611 /* Use phase 3, because phase 4 elides comments. */
612 c = phase3_getc ();
613 if (c == P2_EOF)
614 return UNICODE ('\\');
615 switch (RED (c))
616 {
617 case 'b':
618 return UNICODE (0x08);
619 case 't':
620 return UNICODE (0x09);
621 case 'n':
622 return UNICODE (0x0a);
623 case 'f':
624 return UNICODE (0x0c);
625 case 'r':
626 return UNICODE (0x0d);
627 case '"':
628 return UNICODE ('"');
629 case '\'':
630 return UNICODE ('\'');
631 case '\\':
632 return UNICODE ('\\');
633 case '0': case '1': case '2': case '3':
634 case '4': case '5': case '6': case '7':
635 {
636 int n = RED (c) - '0';
637 bool maybe3digits = (n < 4);
638
639 c = phase3_getc ();
640 if (RED (c) >= '0' && RED (c) <= '7')
641 {
642 n = (n << 3) + (RED (c) - '0');
643 if (maybe3digits)
644 {
645 c = phase3_getc ();
646 if (RED (c) >= '0' && RED (c) <= '7')
647 n = (n << 3) + (RED (c) - '0');
648 else
649 phase3_ungetc (c);
650 }
651 }
652 else
653 phase3_ungetc (c);
654
655 return UNICODE (n);
656 }
657 default:
658 /* Invalid escape sequence. */
659 phase3_ungetc (c);
660 return UNICODE ('\\');
661 }
662 }
663
664 /* Read a string literal or character literal. */
665 static void
accumulate_escaped(struct mixed_string_buffer * literal,int delimiter)666 accumulate_escaped (struct mixed_string_buffer *literal, int delimiter)
667 {
668 int c;
669
670 for (;;)
671 {
672 /* Use phase 3, because phase 4 elides comments. */
673 c = phase3_getc ();
674 if (c == P2_EOF || RED (c) == delimiter)
675 break;
676 if (RED (c) == '\n')
677 {
678 phase3_ungetc (c);
679 error_with_progname = false;
680 if (delimiter == '\'')
681 error (0, 0, _("%s:%d: warning: unterminated character constant"),
682 logical_file_name, line_number);
683 else
684 error (0, 0, _("%s:%d: warning: unterminated string constant"),
685 logical_file_name, line_number);
686 error_with_progname = true;
687 break;
688 }
689 if (RED (c) == '\\')
690 c = do_getc_escaped ();
691 mixed_string_buffer_append (literal, c);
692 }
693 }
694
695
696 /* Strip the common indentation of the non-blank lines of the given string and
697 remove all trailing whitespace of all lines.
698 Like the Java method String.stripIndent does.
699 <https://docs.oracle.com/en/java/javase/13/docs/api/java.base/java/lang/String.html#stripIndent()> */
700 static void
strip_indent(mixed_string_ty * ms)701 strip_indent (mixed_string_ty *ms)
702 {
703 size_t nsegments = ms->nsegments;
704 size_t minimum_indentation = SIZE_MAX;
705 {
706 size_t curr_line_indentation = 0;
707 bool curr_line_blank = true;
708 size_t i;
709
710 for (i = 0; i < nsegments; i++)
711 {
712 struct mixed_string_segment *segment = ms->segments[i];
713
714 if (segment->type == utf8_encoded
715 || (segment->type == source_encoded
716 && xgettext_current_source_encoding == po_charset_utf8))
717 {
718 /* Consider Unicode whitespace characters. */
719 size_t seglength = segment->length;
720 size_t j;
721
722 for (j = 0; j < seglength; )
723 {
724 ucs4_t uc;
725 int bytes =
726 u8_mbtouc (&uc, (const uint8_t *) &segment->contents[j],
727 seglength - j);
728 j += bytes;
729 if (uc == 0x000a)
730 {
731 /* Newline. */
732 if (!curr_line_blank)
733 if (minimum_indentation > curr_line_indentation)
734 minimum_indentation = curr_line_indentation;
735 curr_line_indentation = 0;
736 curr_line_blank = true;
737 }
738 else if (uc_is_java_whitespace (uc))
739 {
740 /* Whitespace character. */
741 if (curr_line_blank)
742 /* Every whitespace character counts as 1, even the TAB
743 character. */
744 curr_line_indentation++;
745 }
746 else
747 {
748 /* Other character. */
749 curr_line_blank = false;
750 }
751 }
752 }
753 else
754 {
755 /* When the encoding is not UTF-8, consider only ASCII whitespace
756 characters. */
757 size_t seglength = segment->length;
758 size_t j;
759
760 for (j = 0; j < seglength; j++)
761 {
762 char c = segment->contents[j];
763 if (c == '\n')
764 {
765 /* Newline. */
766 if (!curr_line_blank)
767 if (minimum_indentation > curr_line_indentation)
768 minimum_indentation = curr_line_indentation;
769 curr_line_indentation = 0;
770 curr_line_blank = true;
771 }
772 else if (c == ' '
773 || (c >= 0x09 && c <= 0x0d)
774 || (c >= 0x1c && c <= 0x1f))
775 {
776 /* Whitespace character. */
777 if (curr_line_blank)
778 /* Every whitespace character counts as 1, even the TAB
779 character. */
780 curr_line_indentation++;
781 }
782 else
783 {
784 /* Other character. */
785 curr_line_blank = false;
786 }
787 }
788 }
789 }
790 /* The indentation of the last line matters even if is blank. */
791 if (minimum_indentation > curr_line_indentation)
792 minimum_indentation = curr_line_indentation;
793 }
794
795 /* The same loop as above, but this time remove the leading
796 minimum_indentation whitespace characters and all trailing whitespace
797 characters from every line. */
798 {
799 size_t start_of_curr_line_i = 0;
800 size_t start_of_curr_line_j = 0;
801 size_t start_of_trailing_whitespace_i = 0;
802 size_t start_of_trailing_whitespace_j = 0;
803 size_t whitespace_to_remove = minimum_indentation;
804 size_t i;
805
806 for (i = 0; i < nsegments; i++)
807 {
808 struct mixed_string_segment *segment = ms->segments[i];
809 /* Perform a sliding copy from segment->contents[from_j] to
810 segment->contents[to_j]. 0 <= to_j <= from_j. */
811 size_t to_j;
812
813 if (segment->type == utf8_encoded
814 || (segment->type == source_encoded
815 && xgettext_current_source_encoding == po_charset_utf8))
816 {
817 /* Consider Unicode whitespace characters. */
818 size_t seglength = segment->length;
819 size_t from_j;
820
821 for (to_j = from_j = 0; from_j < seglength; )
822 {
823 ucs4_t uc;
824 int bytes =
825 u8_mbtouc (&uc, (const uint8_t *) &segment->contents[from_j],
826 seglength - from_j);
827 if (uc == 0x000a)
828 {
829 /* Newline. */
830 if (whitespace_to_remove > 0)
831 {
832 /* It was a blank line with fewer than minimum_indentation
833 whitespace characters. Remove all this whitespace. */
834 if (start_of_curr_line_i < i)
835 {
836 size_t k;
837 ms->segments[start_of_curr_line_i]->length = start_of_curr_line_j;
838 for (k = start_of_curr_line_i + 1; k < i; k++)
839 ms->segments[k]->length = 0;
840 to_j = 0;
841 }
842 else
843 to_j = start_of_curr_line_j;
844 }
845 else
846 {
847 /* Remove the trailing whitespace characters from the
848 current line. */
849 if (start_of_trailing_whitespace_i < i)
850 {
851 size_t k;
852 ms->segments[start_of_trailing_whitespace_i]->length = start_of_trailing_whitespace_j;
853 for (k = start_of_trailing_whitespace_i + 1; k < i; k++)
854 ms->segments[k]->length = 0;
855 to_j = 0;
856 }
857 else
858 to_j = start_of_trailing_whitespace_j;
859 }
860 }
861 if (to_j < from_j)
862 memmove (&segment->contents[to_j], &segment->contents[from_j], bytes);
863 from_j += bytes;
864 to_j += bytes;
865 if (uc == 0x000a)
866 {
867 /* Newline. */
868 start_of_curr_line_i = i;
869 start_of_curr_line_j = to_j;
870 start_of_trailing_whitespace_i = i;
871 start_of_trailing_whitespace_j = to_j;
872 whitespace_to_remove = minimum_indentation;
873 }
874 else if (uc_is_java_whitespace (uc))
875 {
876 /* Whitespace character. */
877 if (whitespace_to_remove > 0
878 && --whitespace_to_remove == 0)
879 {
880 /* Remove the leading minimum_indentation whitespace
881 characters from the current line. */
882 if (start_of_curr_line_i < i)
883 {
884 size_t k;
885 ms->segments[start_of_curr_line_i]->length = start_of_curr_line_j;
886 for (k = start_of_curr_line_i + 1; k < i; k++)
887 ms->segments[k]->length = 0;
888 to_j = 0;
889 }
890 else
891 to_j = start_of_curr_line_j;
892 }
893 }
894 else
895 {
896 /* Other character. */
897 if (whitespace_to_remove > 0)
898 abort ();
899 start_of_trailing_whitespace_i = i;
900 start_of_trailing_whitespace_j = to_j;
901 }
902 }
903 }
904 else
905 {
906 /* When the encoding is not UTF-8, consider only ASCII whitespace
907 characters. */
908 size_t seglength = segment->length;
909 size_t from_j;
910
911 for (to_j = from_j = 0; from_j < seglength; )
912 {
913 char c = segment->contents[from_j++];
914 if (c == '\n')
915 {
916 /* Newline. */
917 if (whitespace_to_remove > 0)
918 {
919 /* It was a blank line with fewer than minimum_indentation
920 whitespace characters. Remove all this whitespace. */
921 if (start_of_curr_line_i < i)
922 {
923 size_t k;
924 ms->segments[start_of_curr_line_i]->length = start_of_curr_line_j;
925 for (k = start_of_curr_line_i + 1; k < i; k++)
926 ms->segments[k]->length = 0;
927 to_j = 0;
928 }
929 else
930 to_j = start_of_curr_line_j;
931 }
932 else
933 {
934 /* Remove the trailing whitespace characters from the
935 current line. */
936 if (start_of_trailing_whitespace_i < i)
937 {
938 size_t k;
939 ms->segments[start_of_trailing_whitespace_i]->length = start_of_trailing_whitespace_j;
940 for (k = start_of_trailing_whitespace_i + 1; k < i; k++)
941 ms->segments[k]->length = 0;
942 to_j = 0;
943 }
944 else
945 to_j = start_of_trailing_whitespace_j;
946 }
947 }
948 segment->contents[to_j++] = c;
949 if (c == '\n')
950 {
951 /* Newline. */
952 start_of_curr_line_i = i;
953 start_of_curr_line_j = to_j;
954 start_of_trailing_whitespace_i = i;
955 start_of_trailing_whitespace_j = to_j;
956 whitespace_to_remove = minimum_indentation;
957 }
958 else if (c == ' '
959 || (c >= 0x09 && c <= 0x0d)
960 || (c >= 0x1c && c <= 0x1f))
961 {
962 /* Whitespace character. */
963 if (whitespace_to_remove > 0
964 && --whitespace_to_remove == 0)
965 {
966 /* Remove the leading minimum_indentation whitespace
967 characters from the current line. */
968 if (start_of_curr_line_i < i)
969 {
970 size_t k;
971 ms->segments[start_of_curr_line_i]->length = start_of_curr_line_j;
972 for (k = start_of_curr_line_i + 1; k < i; k++)
973 ms->segments[k]->length = 0;
974 to_j = 0;
975 }
976 else
977 to_j = start_of_curr_line_j;
978 }
979 }
980 else
981 {
982 /* Other character. */
983 if (whitespace_to_remove > 0)
984 abort ();
985 start_of_trailing_whitespace_i = i;
986 start_of_trailing_whitespace_j = to_j;
987 }
988 }
989 }
990 if (i + 1 == nsegments)
991 {
992 /* Handle the last line. */
993 if (whitespace_to_remove > 0)
994 {
995 /* It was a blank line with fewer than minimum_indentation
996 whitespace characters. Remove all this whitespace. */
997 if (start_of_curr_line_i < i)
998 {
999 size_t k;
1000 ms->segments[start_of_curr_line_i]->length = start_of_curr_line_j;
1001 for (k = start_of_curr_line_i + 1; k < i; k++)
1002 ms->segments[k]->length = 0;
1003 to_j = 0;
1004 }
1005 else
1006 to_j = start_of_curr_line_j;
1007 }
1008 else
1009 {
1010 /* Remove the trailing whitespace characters from the
1011 current line. */
1012 if (start_of_trailing_whitespace_i < i)
1013 {
1014 size_t k;
1015 ms->segments[start_of_trailing_whitespace_i]->length = start_of_trailing_whitespace_j;
1016 for (k = start_of_trailing_whitespace_i + 1; k < i; k++)
1017 ms->segments[k]->length = 0;
1018 to_j = 0;
1019 }
1020 else
1021 to_j = start_of_trailing_whitespace_j;
1022 }
1023 }
1024 segment->length = to_j;
1025 }
1026 }
1027 }
1028
1029
1030 /* Combine characters into tokens. Discard whitespace. */
1031
1032 static token_ty phase5_pushback[3];
1033 static int phase5_pushback_length;
1034
1035 static void
phase5_get(token_ty * tp)1036 phase5_get (token_ty *tp)
1037 {
1038 int c;
1039
1040 if (phase5_pushback_length)
1041 {
1042 *tp = phase5_pushback[--phase5_pushback_length];
1043 return;
1044 }
1045 tp->string = NULL;
1046
1047 for (;;)
1048 {
1049 tp->line_number = line_number;
1050 c = phase4_getc ();
1051
1052 if (c == P2_EOF)
1053 {
1054 tp->type = token_type_eof;
1055 return;
1056 }
1057
1058 switch (RED (c))
1059 {
1060 case '\n':
1061 if (last_non_comment_line > last_comment_line)
1062 savable_comment_reset ();
1063 /* FALLTHROUGH */
1064 case ' ':
1065 case '\t':
1066 case '\f':
1067 /* Ignore whitespace and comments. */
1068 continue;
1069 }
1070
1071 last_non_comment_line = tp->line_number;
1072
1073 switch (RED (c))
1074 {
1075 case '(':
1076 tp->type = token_type_lparen;
1077 return;
1078
1079 case ')':
1080 tp->type = token_type_rparen;
1081 return;
1082
1083 case '{':
1084 tp->type = token_type_lbrace;
1085 return;
1086
1087 case '}':
1088 tp->type = token_type_rbrace;
1089 return;
1090
1091 case ',':
1092 tp->type = token_type_comma;
1093 return;
1094
1095 case '.':
1096 c = phase4_getc ();
1097 if (!(RED (c) >= '0' && RED (c) <= '9'))
1098 {
1099 phase4_ungetc (c);
1100 tp->type = token_type_dot;
1101 return;
1102 }
1103 /* FALLTHROUGH */
1104
1105 case '0': case '1': case '2': case '3': case '4':
1106 case '5': case '6': case '7': case '8': case '9':
1107 {
1108 /* Don't need to verify the complicated syntax of integers and
1109 floating-point numbers. We assume a valid Java input.
1110 The simplified syntax that we recognize as number is: any
1111 sequence of alphanumeric characters, additionally '+' and '-'
1112 immediately after 'e' or 'E' except in hexadecimal numbers. */
1113 bool hexadecimal = false;
1114
1115 for (;;)
1116 {
1117 c = phase4_getc ();
1118 if (RED (c) >= '0' && RED (c) <= '9')
1119 continue;
1120 if ((RED (c) >= 'A' && RED (c) <= 'Z')
1121 || (RED (c) >= 'a' && RED (c) <= 'z'))
1122 {
1123 if (RED (c) == 'X' || RED (c) == 'x')
1124 hexadecimal = true;
1125 if ((RED (c) == 'E' || RED (c) == 'e') && !hexadecimal)
1126 {
1127 c = phase4_getc ();
1128 if (!(RED (c) == '+' || RED (c) == '-'))
1129 phase4_ungetc (c);
1130 }
1131 continue;
1132 }
1133 if (RED (c) == '.')
1134 continue;
1135 break;
1136 }
1137 phase4_ungetc (c);
1138 tp->type = token_type_number;
1139 return;
1140 }
1141
1142 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
1143 case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
1144 case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
1145 case 'V': case 'W': case 'X': case 'Y': case 'Z':
1146 case '_':
1147 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
1148 case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
1149 case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
1150 case 'v': case 'w': case 'x': case 'y': case 'z':
1151 /* Although Java allows identifiers containing many Unicode
1152 characters, we recognize only identifiers consisting of ASCII
1153 characters. This avoids conversion hassles w.r.t. the --keyword
1154 arguments, and shouldn't be a big problem in practice. */
1155 {
1156 static char *buffer;
1157 static int bufmax;
1158 int bufpos = 0;
1159 for (;;)
1160 {
1161 if (bufpos >= bufmax)
1162 {
1163 bufmax = 2 * bufmax + 10;
1164 buffer = xrealloc (buffer, bufmax);
1165 }
1166 buffer[bufpos++] = RED (c);
1167 c = phase4_getc ();
1168 if (!((RED (c) >= 'A' && RED (c) <= 'Z')
1169 || (RED (c) >= 'a' && RED (c) <= 'z')
1170 || (RED (c) >= '0' && RED (c) <= '9')
1171 || RED (c) == '_'))
1172 break;
1173 }
1174 phase4_ungetc (c);
1175 if (bufpos >= bufmax)
1176 {
1177 bufmax = 2 * bufmax + 10;
1178 buffer = xrealloc (buffer, bufmax);
1179 }
1180 buffer[bufpos] = '\0';
1181 tp->string = xstrdup (buffer);
1182 tp->type = token_type_symbol;
1183 return;
1184 }
1185
1186 case '"':
1187 {
1188 int c2 = phase3_getc ();
1189 if (c2 == '"')
1190 {
1191 int c3 = phase3_getc ();
1192 if (c3 == '"')
1193 {
1194 /* Text block. Specification:
1195 <https://docs.oracle.com/javase/specs/jls/se13/preview/text-blocks.html> */
1196 struct mixed_string_buffer block;
1197 unsigned int consecutive_unescaped_doublequotes;
1198 mixed_string_ty *block_content;
1199
1200 /* Parse the part up to and including the first newline. */
1201 for (;;)
1202 {
1203 int ic = phase3_getc ();
1204 if (ic == P2_EOF)
1205 {
1206 error_with_progname = false;
1207 error (0, 0, _("%s:%d: warning: unterminated text block"),
1208 logical_file_name, line_number);
1209 error_with_progname = true;
1210 tp->type = token_type_other;
1211 return;
1212 }
1213 if (RED (ic) == ' ' || RED (ic) == '\t' || RED (ic) == '\f')
1214 ;
1215 else if (RED (ic) == '\n')
1216 break;
1217 else
1218 {
1219 error_with_progname = false;
1220 error (0, 0, _("%s:%d: warning: invalid syntax in text block"),
1221 logical_file_name, line_number);
1222 error_with_progname = true;
1223 tp->type = token_type_other;
1224 return;
1225 }
1226 }
1227
1228 /* Parse the part after the first newline. */
1229 mixed_string_buffer_init (&block, lc_string,
1230 logical_file_name, line_number);
1231 consecutive_unescaped_doublequotes = 0;
1232 for (;;)
1233 {
1234 int ic = phase3_getc ();
1235 if (RED (ic) == '"')
1236 {
1237 consecutive_unescaped_doublequotes++;
1238 if (consecutive_unescaped_doublequotes == 3)
1239 break;
1240 }
1241 else
1242 {
1243 while (consecutive_unescaped_doublequotes > 0)
1244 {
1245 mixed_string_buffer_append (&block, '"');
1246 consecutive_unescaped_doublequotes--;
1247 }
1248 if (ic == P2_EOF)
1249 {
1250 error_with_progname = false;
1251 error (0, 0, _("%s:%d: warning: unterminated text block"),
1252 logical_file_name, block.line_number);
1253 error_with_progname = true;
1254 break;
1255 }
1256 if (RED (ic) == '\\')
1257 ic = do_getc_escaped ();
1258 mixed_string_buffer_append (&block, ic);
1259 }
1260 }
1261 block_content = mixed_string_buffer_result (&block);
1262
1263 /* Remove the common indentation from the content. */
1264 strip_indent (block_content);
1265
1266 tp->mixed_string = block_content;
1267 tp->comment = add_reference (savable_comment);
1268 tp->type = token_type_string_literal;
1269 return;
1270 }
1271 phase3_ungetc (c3);
1272 }
1273 phase3_ungetc (c2);
1274 }
1275 /* String literal. */
1276 {
1277 struct mixed_string_buffer literal;
1278
1279 mixed_string_buffer_init (&literal, lc_string,
1280 logical_file_name, line_number);
1281 accumulate_escaped (&literal, '"');
1282 tp->mixed_string = mixed_string_buffer_result (&literal);
1283 tp->comment = add_reference (savable_comment);
1284 tp->type = token_type_string_literal;
1285 return;
1286 }
1287
1288 case '\'':
1289 /* Character literal. */
1290 {
1291 struct mixed_string_buffer literal;
1292
1293 mixed_string_buffer_init (&literal, lc_outside,
1294 logical_file_name, line_number);
1295 accumulate_escaped (&literal, '\'');
1296 mixed_string_buffer_destroy (&literal);
1297 tp->type = token_type_other;
1298 return;
1299 }
1300
1301 case '+':
1302 c = phase4_getc ();
1303 if (RED (c) == '+')
1304 /* Operator ++ */
1305 tp->type = token_type_other;
1306 else if (RED (c) == '=')
1307 /* Operator += */
1308 tp->type = token_type_other;
1309 else
1310 {
1311 /* Operator + */
1312 phase4_ungetc (c);
1313 tp->type = token_type_plus;
1314 }
1315 return;
1316
1317 default:
1318 /* Misc. operator. */
1319 tp->type = token_type_other;
1320 return;
1321 }
1322 }
1323 }
1324
1325 /* Supports 3 tokens of pushback. */
1326 static void
phase5_unget(token_ty * tp)1327 phase5_unget (token_ty *tp)
1328 {
1329 if (tp->type != token_type_eof)
1330 {
1331 if (phase5_pushback_length == SIZEOF (phase5_pushback))
1332 abort ();
1333 phase5_pushback[phase5_pushback_length++] = *tp;
1334 }
1335 }
1336
1337
1338 /* Compile-time optimization of string literal concatenation.
1339 Combine "string1" + ... + "stringN" to the concatenated string if
1340 - the token before this expression is not ')' (because then the first
1341 string could be part of a cast expression),
1342 - the token after this expression is not '.' (because then the last
1343 string could be part of a method call expression). */
1344
1345 static token_ty phase6_pushback[2];
1346 static int phase6_pushback_length;
1347
1348 static token_type_ty phase6_last;
1349
1350 static void
phase6_get(token_ty * tp)1351 phase6_get (token_ty *tp)
1352 {
1353 if (phase6_pushback_length)
1354 {
1355 *tp = phase6_pushback[--phase6_pushback_length];
1356 return;
1357 }
1358
1359 phase5_get (tp);
1360 if (tp->type == token_type_string_literal && phase6_last != token_type_rparen)
1361 {
1362 mixed_string_ty *sum = tp->mixed_string;
1363
1364 for (;;)
1365 {
1366 token_ty token2;
1367
1368 phase5_get (&token2);
1369 if (token2.type == token_type_plus)
1370 {
1371 token_ty token3;
1372
1373 phase5_get (&token3);
1374 if (token3.type == token_type_string_literal)
1375 {
1376 token_ty token_after;
1377
1378 phase5_get (&token_after);
1379 if (token_after.type != token_type_dot)
1380 {
1381 sum = mixed_string_concat_free1 (sum, token3.mixed_string);
1382
1383 phase5_unget (&token_after);
1384 free_token (&token3);
1385 free_token (&token2);
1386 continue;
1387 }
1388 phase5_unget (&token_after);
1389 }
1390 phase5_unget (&token3);
1391 }
1392 phase5_unget (&token2);
1393 break;
1394 }
1395 tp->mixed_string = sum;
1396 }
1397 phase6_last = tp->type;
1398 }
1399
1400 /* Supports 2 tokens of pushback. */
1401 static void
phase6_unget(token_ty * tp)1402 phase6_unget (token_ty *tp)
1403 {
1404 if (tp->type != token_type_eof)
1405 {
1406 if (phase6_pushback_length == SIZEOF (phase6_pushback))
1407 abort ();
1408 phase6_pushback[phase6_pushback_length++] = *tp;
1409 }
1410 }
1411
1412
1413 static void
x_java_lex(token_ty * tp)1414 x_java_lex (token_ty *tp)
1415 {
1416 phase6_get (tp);
1417 }
1418
1419 /* Supports 2 tokens of pushback. */
1420 static void
x_java_unlex(token_ty * tp)1421 x_java_unlex (token_ty *tp)
1422 {
1423 phase6_unget (tp);
1424 }
1425
1426
1427 /* ========================= Extracting strings. ========================== */
1428
1429
1430 /* Context lookup table. */
1431 static flag_context_list_table_ty *flag_context_list_table;
1432
1433
1434 /* The file is broken into tokens. Scan the token stream, looking for
1435 a keyword, followed by a left paren, followed by a string. When we
1436 see this sequence, we have something to remember. We assume we are
1437 looking at a valid C or C++ program, and leave the complaints about
1438 the grammar to the compiler.
1439
1440 Normal handling: Look for
1441 keyword ( ... msgid ... )
1442 Plural handling: Look for
1443 keyword ( ... msgid ... msgid_plural ... )
1444
1445 We use recursion because the arguments before msgid or between msgid
1446 and msgid_plural can contain subexpressions of the same form. */
1447
1448
1449 /* Extract messages until the next balanced closing parenthesis or brace,
1450 depending on TERMINATOR.
1451 Extracted messages are added to MLP.
1452 Return true upon eof, false upon closing parenthesis or brace. */
1453 static bool
extract_parenthesized(message_list_ty * mlp,token_type_ty terminator,flag_context_ty outer_context,flag_context_list_iterator_ty context_iter,struct arglist_parser * argparser)1454 extract_parenthesized (message_list_ty *mlp, token_type_ty terminator,
1455 flag_context_ty outer_context,
1456 flag_context_list_iterator_ty context_iter,
1457 struct arglist_parser *argparser)
1458 {
1459 /* Current argument number. */
1460 int arg = 1;
1461 /* 0 when no keyword has been seen. 1 right after a keyword is seen. */
1462 int state;
1463 /* Parameters of the keyword just seen. Defined only in state 1. */
1464 const struct callshapes *next_shapes = NULL;
1465 /* Context iterator that will be used if the next token is a '('. */
1466 flag_context_list_iterator_ty next_context_iter =
1467 passthrough_context_list_iterator;
1468 /* Current context. */
1469 flag_context_ty inner_context =
1470 inherited_context (outer_context,
1471 flag_context_list_iterator_advance (&context_iter));
1472
1473 /* Start state is 0. */
1474 state = 0;
1475
1476 for (;;)
1477 {
1478 token_ty token;
1479
1480 x_java_lex (&token);
1481 switch (token.type)
1482 {
1483 case token_type_symbol:
1484 {
1485 /* Combine symbol1 . ... . symbolN to a single strings, so that
1486 we can recognize static function calls like
1487 GettextResource.gettext. The information present for
1488 symbolI.....symbolN has precedence over the information for
1489 symbolJ.....symbolN with J > I. */
1490 char *sum = token.string;
1491 size_t sum_len = strlen (sum);
1492 const char *dottedname;
1493 flag_context_list_ty *context_list;
1494
1495 for (;;)
1496 {
1497 token_ty token2;
1498
1499 x_java_lex (&token2);
1500 if (token2.type == token_type_dot)
1501 {
1502 token_ty token3;
1503
1504 x_java_lex (&token3);
1505 if (token3.type == token_type_symbol)
1506 {
1507 char *addend = token3.string;
1508 size_t addend_len = strlen (addend);
1509
1510 sum =
1511 (char *) xrealloc (sum, sum_len + 1 + addend_len + 1);
1512 sum[sum_len] = '.';
1513 memcpy (sum + sum_len + 1, addend, addend_len + 1);
1514 sum_len += 1 + addend_len;
1515
1516 free_token (&token3);
1517 free_token (&token2);
1518 continue;
1519 }
1520 x_java_unlex (&token3);
1521 }
1522 x_java_unlex (&token2);
1523 break;
1524 }
1525
1526 for (dottedname = sum;;)
1527 {
1528 void *keyword_value;
1529
1530 if (hash_find_entry (&keywords, dottedname, strlen (dottedname),
1531 &keyword_value)
1532 == 0)
1533 {
1534 next_shapes = (const struct callshapes *) keyword_value;
1535 state = 1;
1536 break;
1537 }
1538
1539 dottedname = strchr (dottedname, '.');
1540 if (dottedname == NULL)
1541 {
1542 state = 0;
1543 break;
1544 }
1545 dottedname++;
1546 }
1547
1548 for (dottedname = sum;;)
1549 {
1550 context_list =
1551 flag_context_list_table_lookup (
1552 flag_context_list_table,
1553 dottedname, strlen (dottedname));
1554 if (context_list != NULL)
1555 break;
1556
1557 dottedname = strchr (dottedname, '.');
1558 if (dottedname == NULL)
1559 break;
1560 dottedname++;
1561 }
1562 next_context_iter = flag_context_list_iterator (context_list);
1563
1564 free (sum);
1565 continue;
1566 }
1567
1568 case token_type_lparen:
1569 if (extract_parenthesized (mlp, token_type_rparen,
1570 inner_context, next_context_iter,
1571 arglist_parser_alloc (mlp,
1572 state ? next_shapes : NULL)))
1573 {
1574 arglist_parser_done (argparser, arg);
1575 return true;
1576 }
1577 next_context_iter = null_context_list_iterator;
1578 state = 0;
1579 continue;
1580
1581 case token_type_rparen:
1582 if (terminator == token_type_rparen)
1583 {
1584 arglist_parser_done (argparser, arg);
1585 return false;
1586 }
1587 if (terminator == token_type_rbrace)
1588 {
1589 error_with_progname = false;
1590 error (0, 0,
1591 _("%s:%d: warning: ')' found where '}' was expected"),
1592 logical_file_name, token.line_number);
1593 error_with_progname = true;
1594 }
1595 next_context_iter = null_context_list_iterator;
1596 state = 0;
1597 continue;
1598
1599 case token_type_lbrace:
1600 if (extract_parenthesized (mlp, token_type_rbrace,
1601 null_context, null_context_list_iterator,
1602 arglist_parser_alloc (mlp, NULL)))
1603 {
1604 arglist_parser_done (argparser, arg);
1605 return true;
1606 }
1607 next_context_iter = null_context_list_iterator;
1608 state = 0;
1609 continue;
1610
1611 case token_type_rbrace:
1612 if (terminator == token_type_rbrace)
1613 {
1614 arglist_parser_done (argparser, arg);
1615 return false;
1616 }
1617 if (terminator == token_type_rparen)
1618 {
1619 error_with_progname = false;
1620 error (0, 0,
1621 _("%s:%d: warning: '}' found where ')' was expected"),
1622 logical_file_name, token.line_number);
1623 error_with_progname = true;
1624 }
1625 next_context_iter = null_context_list_iterator;
1626 state = 0;
1627 continue;
1628
1629 case token_type_comma:
1630 arg++;
1631 inner_context =
1632 inherited_context (outer_context,
1633 flag_context_list_iterator_advance (
1634 &context_iter));
1635 next_context_iter = passthrough_context_list_iterator;
1636 state = 0;
1637 continue;
1638
1639 case token_type_string_literal:
1640 {
1641 lex_pos_ty pos;
1642
1643 pos.file_name = logical_file_name;
1644 pos.line_number = token.line_number;
1645
1646 if (extract_all)
1647 {
1648 char *string = mixed_string_contents (token.mixed_string);
1649 mixed_string_free (token.mixed_string);
1650 remember_a_message (mlp, NULL, string, true, false,
1651 inner_context, &pos,
1652 NULL, token.comment, true);
1653 }
1654 else
1655 arglist_parser_remember (argparser, arg, token.mixed_string,
1656 inner_context,
1657 pos.file_name, pos.line_number,
1658 token.comment, true);
1659 }
1660 drop_reference (token.comment);
1661 next_context_iter = null_context_list_iterator;
1662 state = 0;
1663 continue;
1664
1665 case token_type_eof:
1666 arglist_parser_done (argparser, arg);
1667 return true;
1668
1669 case token_type_dot:
1670 case token_type_number:
1671 case token_type_plus:
1672 case token_type_other:
1673 next_context_iter = null_context_list_iterator;
1674 state = 0;
1675 continue;
1676
1677 default:
1678 abort ();
1679 }
1680 }
1681 }
1682
1683
1684 void
extract_java(FILE * f,const char * real_filename,const char * logical_filename,flag_context_list_table_ty * flag_table,msgdomain_list_ty * mdlp)1685 extract_java (FILE *f,
1686 const char *real_filename, const char *logical_filename,
1687 flag_context_list_table_ty *flag_table,
1688 msgdomain_list_ty *mdlp)
1689 {
1690 message_list_ty *mlp = mdlp->item[0]->messages;
1691
1692 fp = f;
1693 real_file_name = real_filename;
1694 logical_file_name = xstrdup (logical_filename);
1695 line_number = 1;
1696
1697 phase1_pushback_length = 0;
1698 phase2_pushback_length = 0;
1699 phase3_pushback_length = 0;
1700
1701 last_comment_line = -1;
1702 last_non_comment_line = -1;
1703
1704 phase5_pushback_length = 0;
1705 phase6_pushback_length = 0;
1706 phase6_last = token_type_eof;
1707
1708 flag_context_list_table = flag_table;
1709
1710 init_keywords ();
1711
1712 /* Eat tokens until eof is seen. When extract_parenthesized returns
1713 due to an unbalanced closing parenthesis, just restart it. */
1714 while (!extract_parenthesized (mlp, token_type_eof,
1715 null_context, null_context_list_iterator,
1716 arglist_parser_alloc (mlp, NULL)))
1717 ;
1718
1719 fp = NULL;
1720 real_file_name = NULL;
1721 logical_file_name = NULL;
1722 line_number = 0;
1723 }
1724