• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Reading binary .mo files.
2    Copyright (C) 1995-1998, 2000-2007, 2014-2015, 2017, 2020 Free Software Foundation, Inc.
3    Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17 
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21 
22 /* Specification.  */
23 #include "read-mo.h"
24 
25 #include <errno.h>
26 #include <stdbool.h>
27 #include <stdio.h>
28 #include <stddef.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 /* These two include files describe the binary .mo format.  */
33 #include "gmo.h"
34 #include "hash-string.h"
35 
36 #include "error.h"
37 #include "xalloc.h"
38 #include "binary-io.h"
39 #include "message.h"
40 #include "format.h"
41 #include "gettext.h"
42 #include "xsize.h"
43 
44 #define _(str) gettext (str)
45 
46 
47 enum mo_endianness
48 {
49   MO_LITTLE_ENDIAN,
50   MO_BIG_ENDIAN
51 };
52 
53 /* We read the file completely into memory.  This is more efficient than
54    lots of lseek().  This struct represents the .mo file in memory.  */
55 struct binary_mo_file
56 {
57   const char *filename;
58   char *data;
59   size_t size;
60   enum mo_endianness endian;
61 };
62 
63 
64 /* Read the contents of the given input stream.  */
65 static void
read_binary_mo_file(struct binary_mo_file * bfp,FILE * fp,const char * filename)66 read_binary_mo_file (struct binary_mo_file *bfp,
67                      FILE *fp, const char *filename)
68 {
69   char *buf = NULL;
70   size_t alloc = 0;
71   size_t size = 0;
72   size_t count;
73 
74   while (!feof (fp))
75     {
76       const size_t increment = 4096;
77       if (size + increment > alloc)
78         {
79           alloc = alloc + alloc / 2;
80           if (alloc < size + increment)
81             alloc = size + increment;
82           buf = (char *) xrealloc (buf, alloc);
83         }
84       count = fread (buf + size, 1, increment, fp);
85       if (count == 0)
86         {
87           if (ferror (fp))
88             error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
89                    filename);
90         }
91       else
92         size += count;
93     }
94   buf = (char *) xrealloc (buf, size);
95   bfp->filename = filename;
96   bfp->data = buf;
97   bfp->size = size;
98 }
99 
100 /* Get a 32-bit number from the file, at the given file position.  */
101 static nls_uint32
get_uint32(const struct binary_mo_file * bfp,size_t offset)102 get_uint32 (const struct binary_mo_file *bfp, size_t offset)
103 {
104   nls_uint32 b0, b1, b2, b3;
105   size_t end = xsum (offset, 4);
106 
107   if (size_overflow_p (end) || end > bfp->size)
108     error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
109 
110   b0 = *(unsigned char *) (bfp->data + offset + 0);
111   b1 = *(unsigned char *) (bfp->data + offset + 1);
112   b2 = *(unsigned char *) (bfp->data + offset + 2);
113   b3 = *(unsigned char *) (bfp->data + offset + 3);
114   if (bfp->endian == MO_LITTLE_ENDIAN)
115     return b0 | (b1 << 8) | (b2 << 16) | (b3 << 24);
116   else
117     return (b0 << 24) | (b1 << 16) | (b2 << 8) | b3;
118 }
119 
120 /* Get a static string from the file, at the given file position.  */
121 static char *
get_string(const struct binary_mo_file * bfp,size_t offset,size_t * lengthp)122 get_string (const struct binary_mo_file *bfp, size_t offset, size_t *lengthp)
123 {
124   /* See 'struct string_desc'.  */
125   nls_uint32 s_length = get_uint32 (bfp, offset);
126   nls_uint32 s_offset = get_uint32 (bfp, offset + 4);
127   size_t s_end = xsum3 (s_offset, s_length, 1);
128 
129   if (size_overflow_p (s_end) || s_end > bfp->size)
130     error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
131   if (bfp->data[s_offset + s_length] != '\0')
132     error (EXIT_FAILURE, 0,
133            _("file \"%s\" contains a not NUL terminated string"),
134            bfp->filename);
135 
136   *lengthp = s_length + 1;
137   return bfp->data + s_offset;
138 }
139 
140 /* Get a system dependent string from the file, at the given file position.  */
141 static char *
get_sysdep_string(const struct binary_mo_file * bfp,size_t offset,const struct mo_file_header * header,size_t * lengthp)142 get_sysdep_string (const struct binary_mo_file *bfp, size_t offset,
143                    const struct mo_file_header *header, size_t *lengthp)
144 {
145   /* See 'struct sysdep_string'.  */
146   size_t length;
147   char *string;
148   size_t i;
149   char *p;
150   nls_uint32 s_offset;
151 
152   /* Compute the length.  */
153   s_offset = get_uint32 (bfp, offset);
154   length = 0;
155   for (i = 4; ; i += 8)
156     {
157       nls_uint32 segsize = get_uint32 (bfp, offset + i);
158       nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4);
159       nls_uint32 sysdep_segment_offset;
160       nls_uint32 ss_length;
161       nls_uint32 ss_offset;
162       size_t ss_end;
163       size_t s_end;
164       size_t n;
165 
166       s_end = xsum (s_offset, segsize);
167       if (size_overflow_p (s_end) || s_end > bfp->size)
168         error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
169       length += segsize;
170       s_offset += segsize;
171 
172       if (sysdepref == SEGMENTS_END)
173         {
174           /* The last static segment must end in a NUL.  */
175           if (!(segsize > 0 && bfp->data[s_offset - 1] == '\0'))
176             /* Invalid.  */
177             error (EXIT_FAILURE, 0,
178                    _("file \"%s\" contains a not NUL terminated system dependent string"),
179                    bfp->filename);
180           break;
181         }
182       if (sysdepref >= header->n_sysdep_segments)
183         /* Invalid.  */
184         error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"),
185                bfp->filename);
186       /* See 'struct sysdep_segment'.  */
187       sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8;
188       ss_length = get_uint32 (bfp, sysdep_segment_offset);
189       ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4);
190       ss_end = xsum (ss_offset, ss_length);
191       if (size_overflow_p (ss_end) || ss_end > bfp->size)
192         error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename);
193       if (!(ss_length > 0 && bfp->data[ss_end - 1] == '\0'))
194         {
195           char location[30];
196           sprintf (location, "sysdep_segment[%u]", (unsigned int) sysdepref);
197           error (EXIT_FAILURE, 0,
198                  _("file \"%s\" contains a not NUL terminated string, at %s"),
199                  bfp->filename, location);
200         }
201       n = strlen (bfp->data + ss_offset);
202       length += (n > 1 ? 1 + n + 1 : n);
203     }
204 
205   /* Allocate and fill the string.  */
206   string = XNMALLOC (length, char);
207   p = string;
208   s_offset = get_uint32 (bfp, offset);
209   for (i = 4; ; i += 8)
210     {
211       nls_uint32 segsize = get_uint32 (bfp, offset + i);
212       nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4);
213       nls_uint32 sysdep_segment_offset;
214       nls_uint32 ss_length;
215       nls_uint32 ss_offset;
216       size_t n;
217 
218       memcpy (p, bfp->data + s_offset, segsize);
219       p += segsize;
220       s_offset += segsize;
221 
222       if (sysdepref == SEGMENTS_END)
223         break;
224       if (sysdepref >= header->n_sysdep_segments)
225         abort ();
226       /* See 'struct sysdep_segment'.  */
227       sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8;
228       ss_length = get_uint32 (bfp, sysdep_segment_offset);
229       ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4);
230       if (ss_offset + ss_length > bfp->size)
231         abort ();
232       if (!(ss_length > 0 && bfp->data[ss_offset + ss_length - 1] == '\0'))
233         abort ();
234       n = strlen (bfp->data + ss_offset);
235       if (n > 1)
236         *p++ = '<';
237       memcpy (p, bfp->data + ss_offset, n);
238       p += n;
239       if (n > 1)
240         *p++ = '>';
241     }
242 
243   if (p != string + length)
244     abort ();
245 
246   *lengthp = length;
247   return string;
248 }
249 
250 /* Reads an existing .mo file and adds the messages to mlp.  */
251 void
read_mo_file(message_list_ty * mlp,const char * filename)252 read_mo_file (message_list_ty *mlp, const char *filename)
253 {
254   FILE *fp;
255   struct binary_mo_file bf;
256   struct mo_file_header header;
257   unsigned int i;
258   static lex_pos_ty pos = { __FILE__, __LINE__ };
259 
260   if (strcmp (filename, "-") == 0 || strcmp (filename, "/dev/stdin") == 0)
261     {
262       fp = stdin;
263       SET_BINARY (fileno (fp));
264     }
265   else
266     {
267       fp = fopen (filename, "rb");
268       if (fp == NULL)
269         error (EXIT_FAILURE, errno,
270                _("error while opening \"%s\" for reading"), filename);
271     }
272 
273   /* Read the file contents into memory.  */
274   read_binary_mo_file (&bf, fp, filename);
275 
276   /* Get a 32-bit number from the file header.  */
277 # define GET_HEADER_FIELD(field) \
278     get_uint32 (&bf, offsetof (struct mo_file_header, field))
279 
280   /* We must grope the file to determine which endian it is.
281      Perversity of the universe tends towards maximum, so it will
282      probably not match the currently executing architecture.  */
283   bf.endian = MO_BIG_ENDIAN;
284   header.magic = GET_HEADER_FIELD (magic);
285   if (header.magic != _MAGIC)
286     {
287       bf.endian = MO_LITTLE_ENDIAN;
288       header.magic = GET_HEADER_FIELD (magic);
289       if (header.magic != _MAGIC)
290         {
291         unrecognised:
292           error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"),
293                  filename);
294         }
295     }
296 
297   header.revision = GET_HEADER_FIELD (revision);
298 
299   /* We support only the major revisions 0 and 1.  */
300   switch (header.revision >> 16)
301     {
302     case 0:
303     case 1:
304       /* Fill the header parts that apply to major revisions 0 and 1.  */
305       header.nstrings = GET_HEADER_FIELD (nstrings);
306       header.orig_tab_offset = GET_HEADER_FIELD (orig_tab_offset);
307       header.trans_tab_offset = GET_HEADER_FIELD (trans_tab_offset);
308       header.hash_tab_size = GET_HEADER_FIELD (hash_tab_size);
309       header.hash_tab_offset = GET_HEADER_FIELD (hash_tab_offset);
310 
311       /* The following verifications attempt to ensure that 'msgunfmt' complains
312          about a .mo file that may make libintl crash at run time.  */
313 
314       /* Verify that the array of messages is sorted.  */
315       {
316         char *prev_msgid = NULL;
317 
318         for (i = 0; i < header.nstrings; i++)
319           {
320             char *msgid;
321             size_t msgid_len;
322 
323             msgid = get_string (&bf, header.orig_tab_offset + i * 8,
324                                 &msgid_len);
325             if (i == 0)
326               prev_msgid = msgid;
327             else
328               {
329                 if (!(strcmp (prev_msgid, msgid) < 0))
330                   error (EXIT_FAILURE, 0,
331                          _("file \"%s\" is not in GNU .mo format: The array of messages is not sorted."),
332                          filename);
333               }
334           }
335       }
336 
337       /* Verify the hash table.  */
338       if (header.hash_tab_size > 0)
339         {
340           char *seen;
341           unsigned int j;
342 
343           /* Verify the hash table's size.  */
344           if (!(header.hash_tab_size > 2))
345             error (EXIT_FAILURE, 0,
346                    _("file \"%s\" is not in GNU .mo format: The hash table size is invalid."),
347                    filename);
348 
349           /* Verify that the non-empty hash table entries contain the values
350              1, ..., nstrings, each exactly once.  */
351           seen = (char *) xcalloc (header.nstrings, 1);
352           for (j = 0; j < header.hash_tab_size; j++)
353             {
354               nls_uint32 entry =
355                 get_uint32 (&bf, header.hash_tab_offset + j * 4);
356 
357               if (entry != 0)
358                 {
359                   i = entry - 1;
360                   if (!(i < header.nstrings && seen[i] == 0))
361                     error (EXIT_FAILURE, 0,
362                            _("file \"%s\" is not in GNU .mo format: The hash table contains invalid entries."),
363                            filename);
364                   seen[i] = 1;
365                 }
366             }
367           for (i = 0; i < header.nstrings; i++)
368             if (seen[i] == 0)
369               error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format: Some messages are not present in the hash table."),
370                      filename);
371           free (seen);
372 
373           /* Verify that the hash table lookup algorithm finds the entry for
374              each message.  */
375           for (i = 0; i < header.nstrings; i++)
376             {
377               size_t msgid_len;
378               char *msgid = get_string (&bf, header.orig_tab_offset + i * 8,
379                                         &msgid_len);
380               nls_uint32 hash_val = hash_string (msgid);
381               nls_uint32 idx = hash_val % header.hash_tab_size;
382               nls_uint32 incr = 1 + (hash_val % (header.hash_tab_size - 2));
383               for (;;)
384                 {
385                   nls_uint32 entry =
386                     get_uint32 (&bf, header.hash_tab_offset + idx * 4);
387 
388                   if (entry == 0)
389                     error (EXIT_FAILURE, 0,
390                            _("file \"%s\" is not in GNU .mo format: Some messages are at a wrong index in the hash table."),
391                            filename);
392                   if (entry == i + 1)
393                     break;
394 
395                   if (idx >= header.hash_tab_size - incr)
396                     idx -= header.hash_tab_size - incr;
397                   else
398                     idx += incr;
399                 }
400             }
401         }
402 
403       for (i = 0; i < header.nstrings; i++)
404         {
405           message_ty *mp;
406           char *msgctxt;
407           char *msgid;
408           size_t msgid_len;
409           char *separator;
410           char *msgstr;
411           size_t msgstr_len;
412 
413           /* Read the msgctxt and msgid.  */
414           msgid = get_string (&bf, header.orig_tab_offset + i * 8,
415                               &msgid_len);
416           /* Split into msgctxt and msgid.  */
417           separator = strchr (msgid, MSGCTXT_SEPARATOR);
418           if (separator != NULL)
419             {
420               /* The part before the MSGCTXT_SEPARATOR is the msgctxt.  */
421               *separator = '\0';
422               msgctxt = msgid;
423               msgid = separator + 1;
424               msgid_len -= msgid - msgctxt;
425             }
426           else
427             msgctxt = NULL;
428 
429           /* Read the msgstr.  */
430           msgstr = get_string (&bf, header.trans_tab_offset + i * 8,
431                                &msgstr_len);
432 
433           mp = message_alloc (msgctxt,
434                               msgid,
435                               (strlen (msgid) + 1 < msgid_len
436                                ? msgid + strlen (msgid) + 1
437                                : NULL),
438                               msgstr, msgstr_len,
439                               &pos);
440           message_list_append (mlp, mp);
441         }
442 
443       switch (header.revision & 0xffff)
444         {
445         case 0:
446           break;
447         case 1:
448         default:
449           /* Fill the header parts that apply to minor revision >= 1.  */
450           header.n_sysdep_segments = GET_HEADER_FIELD (n_sysdep_segments);
451           header.sysdep_segments_offset =
452             GET_HEADER_FIELD (sysdep_segments_offset);
453           header.n_sysdep_strings = GET_HEADER_FIELD (n_sysdep_strings);
454           header.orig_sysdep_tab_offset =
455             GET_HEADER_FIELD (orig_sysdep_tab_offset);
456           header.trans_sysdep_tab_offset =
457             GET_HEADER_FIELD (trans_sysdep_tab_offset);
458 
459           for (i = 0; i < header.n_sysdep_strings; i++)
460             {
461               message_ty *mp;
462               char *msgctxt;
463               char *msgid;
464               size_t msgid_len;
465               char *separator;
466               char *msgstr;
467               size_t msgstr_len;
468               nls_uint32 offset;
469               size_t f;
470 
471               /* Read the msgctxt and msgid.  */
472               offset = get_uint32 (&bf, header.orig_sysdep_tab_offset + i * 4);
473               msgid = get_sysdep_string (&bf, offset, &header, &msgid_len);
474               /* Split into msgctxt and msgid.  */
475               separator = strchr (msgid, MSGCTXT_SEPARATOR);
476               if (separator != NULL)
477                 {
478                   /* The part before the MSGCTXT_SEPARATOR is the msgctxt.  */
479                   *separator = '\0';
480                   msgctxt = msgid;
481                   msgid = separator + 1;
482                   msgid_len -= msgid - msgctxt;
483                 }
484               else
485                 msgctxt = NULL;
486 
487               /* Read the msgstr.  */
488               offset = get_uint32 (&bf, header.trans_sysdep_tab_offset + i * 4);
489               msgstr = get_sysdep_string (&bf, offset, &header, &msgstr_len);
490 
491               mp = message_alloc (msgctxt,
492                                   msgid,
493                                   (strlen (msgid) + 1 < msgid_len
494                                    ? msgid + strlen (msgid) + 1
495                                    : NULL),
496                                   msgstr, msgstr_len,
497                                   &pos);
498 
499               /* Only messages with c-format or objc-format annotation are
500                  recognized as having system-dependent strings by msgfmt.
501                  Which one of the two, we don't know.  We have to guess,
502                  assuming that c-format is more probable than objc-format and
503                  that the .mo was likely produced by "msgfmt -c".  */
504               for (f = format_c; ; f = format_objc)
505                 {
506                   bool valid = true;
507                   struct formatstring_parser *parser = formatstring_parsers[f];
508                   const char *str_end;
509                   const char *str;
510 
511                   str_end = msgid + msgid_len;
512                   for (str = msgid; str < str_end; str += strlen (str) + 1)
513                     {
514                       char *invalid_reason = NULL;
515                       void *descr =
516                         parser->parse (str, false, NULL, &invalid_reason);
517 
518                       if (descr != NULL)
519                         parser->free (descr);
520                       else
521                         {
522                           free (invalid_reason);
523                           valid = false;
524                           break;
525                         }
526                     }
527                   if (valid)
528                     {
529                       str_end = msgstr + msgstr_len;
530                       for (str = msgstr; str < str_end; str += strlen (str) + 1)
531                         {
532                           char *invalid_reason = NULL;
533                           void *descr =
534                             parser->parse (str, true, NULL, &invalid_reason);
535 
536                           if (descr != NULL)
537                             parser->free (descr);
538                           else
539                             {
540                               free (invalid_reason);
541                               valid = false;
542                               break;
543                             }
544                         }
545                     }
546 
547                   if (valid)
548                     {
549                       /* Found the most likely among c-format, objc-format.  */
550                       mp->is_format[f] = yes;
551                       break;
552                     }
553 
554                   /* Try next f.  */
555                   if (f == format_objc)
556                     break;
557                 }
558 
559               message_list_append (mlp, mp);
560             }
561           break;
562         }
563       break;
564 
565     default:
566       goto unrecognised;
567     }
568 
569   if (fp != stdin)
570     fclose (fp);
571 }
572