/* Reading binary .mo files. Copyright (C) 1995-1998, 2000-2007, 2014-2015, 2017, 2020 Free Software Foundation, Inc. Written by Ulrich Drepper , April 1995. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifdef HAVE_CONFIG_H # include #endif /* Specification. */ #include "read-mo.h" #include #include #include #include #include #include /* These two include files describe the binary .mo format. */ #include "gmo.h" #include "hash-string.h" #include "error.h" #include "xalloc.h" #include "binary-io.h" #include "message.h" #include "format.h" #include "gettext.h" #include "xsize.h" #define _(str) gettext (str) enum mo_endianness { MO_LITTLE_ENDIAN, MO_BIG_ENDIAN }; /* We read the file completely into memory. This is more efficient than lots of lseek(). This struct represents the .mo file in memory. */ struct binary_mo_file { const char *filename; char *data; size_t size; enum mo_endianness endian; }; /* Read the contents of the given input stream. */ static void read_binary_mo_file (struct binary_mo_file *bfp, FILE *fp, const char *filename) { char *buf = NULL; size_t alloc = 0; size_t size = 0; size_t count; while (!feof (fp)) { const size_t increment = 4096; if (size + increment > alloc) { alloc = alloc + alloc / 2; if (alloc < size + increment) alloc = size + increment; buf = (char *) xrealloc (buf, alloc); } count = fread (buf + size, 1, increment, fp); if (count == 0) { if (ferror (fp)) error (EXIT_FAILURE, errno, _("error while reading \"%s\""), filename); } else size += count; } buf = (char *) xrealloc (buf, size); bfp->filename = filename; bfp->data = buf; bfp->size = size; } /* Get a 32-bit number from the file, at the given file position. */ static nls_uint32 get_uint32 (const struct binary_mo_file *bfp, size_t offset) { nls_uint32 b0, b1, b2, b3; size_t end = xsum (offset, 4); if (size_overflow_p (end) || end > bfp->size) error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename); b0 = *(unsigned char *) (bfp->data + offset + 0); b1 = *(unsigned char *) (bfp->data + offset + 1); b2 = *(unsigned char *) (bfp->data + offset + 2); b3 = *(unsigned char *) (bfp->data + offset + 3); if (bfp->endian == MO_LITTLE_ENDIAN) return b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); else return (b0 << 24) | (b1 << 16) | (b2 << 8) | b3; } /* Get a static string from the file, at the given file position. */ static char * get_string (const struct binary_mo_file *bfp, size_t offset, size_t *lengthp) { /* See 'struct string_desc'. */ nls_uint32 s_length = get_uint32 (bfp, offset); nls_uint32 s_offset = get_uint32 (bfp, offset + 4); size_t s_end = xsum3 (s_offset, s_length, 1); if (size_overflow_p (s_end) || s_end > bfp->size) error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename); if (bfp->data[s_offset + s_length] != '\0') error (EXIT_FAILURE, 0, _("file \"%s\" contains a not NUL terminated string"), bfp->filename); *lengthp = s_length + 1; return bfp->data + s_offset; } /* Get a system dependent string from the file, at the given file position. */ static char * get_sysdep_string (const struct binary_mo_file *bfp, size_t offset, const struct mo_file_header *header, size_t *lengthp) { /* See 'struct sysdep_string'. */ size_t length; char *string; size_t i; char *p; nls_uint32 s_offset; /* Compute the length. */ s_offset = get_uint32 (bfp, offset); length = 0; for (i = 4; ; i += 8) { nls_uint32 segsize = get_uint32 (bfp, offset + i); nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4); nls_uint32 sysdep_segment_offset; nls_uint32 ss_length; nls_uint32 ss_offset; size_t ss_end; size_t s_end; size_t n; s_end = xsum (s_offset, segsize); if (size_overflow_p (s_end) || s_end > bfp->size) error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename); length += segsize; s_offset += segsize; if (sysdepref == SEGMENTS_END) { /* The last static segment must end in a NUL. */ if (!(segsize > 0 && bfp->data[s_offset - 1] == '\0')) /* Invalid. */ error (EXIT_FAILURE, 0, _("file \"%s\" contains a not NUL terminated system dependent string"), bfp->filename); break; } if (sysdepref >= header->n_sysdep_segments) /* Invalid. */ error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"), bfp->filename); /* See 'struct sysdep_segment'. */ sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8; ss_length = get_uint32 (bfp, sysdep_segment_offset); ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4); ss_end = xsum (ss_offset, ss_length); if (size_overflow_p (ss_end) || ss_end > bfp->size) error (EXIT_FAILURE, 0, _("file \"%s\" is truncated"), bfp->filename); if (!(ss_length > 0 && bfp->data[ss_end - 1] == '\0')) { char location[30]; sprintf (location, "sysdep_segment[%u]", (unsigned int) sysdepref); error (EXIT_FAILURE, 0, _("file \"%s\" contains a not NUL terminated string, at %s"), bfp->filename, location); } n = strlen (bfp->data + ss_offset); length += (n > 1 ? 1 + n + 1 : n); } /* Allocate and fill the string. */ string = XNMALLOC (length, char); p = string; s_offset = get_uint32 (bfp, offset); for (i = 4; ; i += 8) { nls_uint32 segsize = get_uint32 (bfp, offset + i); nls_uint32 sysdepref = get_uint32 (bfp, offset + i + 4); nls_uint32 sysdep_segment_offset; nls_uint32 ss_length; nls_uint32 ss_offset; size_t n; memcpy (p, bfp->data + s_offset, segsize); p += segsize; s_offset += segsize; if (sysdepref == SEGMENTS_END) break; if (sysdepref >= header->n_sysdep_segments) abort (); /* See 'struct sysdep_segment'. */ sysdep_segment_offset = header->sysdep_segments_offset + sysdepref * 8; ss_length = get_uint32 (bfp, sysdep_segment_offset); ss_offset = get_uint32 (bfp, sysdep_segment_offset + 4); if (ss_offset + ss_length > bfp->size) abort (); if (!(ss_length > 0 && bfp->data[ss_offset + ss_length - 1] == '\0')) abort (); n = strlen (bfp->data + ss_offset); if (n > 1) *p++ = '<'; memcpy (p, bfp->data + ss_offset, n); p += n; if (n > 1) *p++ = '>'; } if (p != string + length) abort (); *lengthp = length; return string; } /* Reads an existing .mo file and adds the messages to mlp. */ void read_mo_file (message_list_ty *mlp, const char *filename) { FILE *fp; struct binary_mo_file bf; struct mo_file_header header; unsigned int i; static lex_pos_ty pos = { __FILE__, __LINE__ }; if (strcmp (filename, "-") == 0 || strcmp (filename, "/dev/stdin") == 0) { fp = stdin; SET_BINARY (fileno (fp)); } else { fp = fopen (filename, "rb"); if (fp == NULL) error (EXIT_FAILURE, errno, _("error while opening \"%s\" for reading"), filename); } /* Read the file contents into memory. */ read_binary_mo_file (&bf, fp, filename); /* Get a 32-bit number from the file header. */ # define GET_HEADER_FIELD(field) \ get_uint32 (&bf, offsetof (struct mo_file_header, field)) /* We must grope the file to determine which endian it is. Perversity of the universe tends towards maximum, so it will probably not match the currently executing architecture. */ bf.endian = MO_BIG_ENDIAN; header.magic = GET_HEADER_FIELD (magic); if (header.magic != _MAGIC) { bf.endian = MO_LITTLE_ENDIAN; header.magic = GET_HEADER_FIELD (magic); if (header.magic != _MAGIC) { unrecognised: error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format"), filename); } } header.revision = GET_HEADER_FIELD (revision); /* We support only the major revisions 0 and 1. */ switch (header.revision >> 16) { case 0: case 1: /* Fill the header parts that apply to major revisions 0 and 1. */ header.nstrings = GET_HEADER_FIELD (nstrings); header.orig_tab_offset = GET_HEADER_FIELD (orig_tab_offset); header.trans_tab_offset = GET_HEADER_FIELD (trans_tab_offset); header.hash_tab_size = GET_HEADER_FIELD (hash_tab_size); header.hash_tab_offset = GET_HEADER_FIELD (hash_tab_offset); /* The following verifications attempt to ensure that 'msgunfmt' complains about a .mo file that may make libintl crash at run time. */ /* Verify that the array of messages is sorted. */ { char *prev_msgid = NULL; for (i = 0; i < header.nstrings; i++) { char *msgid; size_t msgid_len; msgid = get_string (&bf, header.orig_tab_offset + i * 8, &msgid_len); if (i == 0) prev_msgid = msgid; else { if (!(strcmp (prev_msgid, msgid) < 0)) error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format: The array of messages is not sorted."), filename); } } } /* Verify the hash table. */ if (header.hash_tab_size > 0) { char *seen; unsigned int j; /* Verify the hash table's size. */ if (!(header.hash_tab_size > 2)) error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format: The hash table size is invalid."), filename); /* Verify that the non-empty hash table entries contain the values 1, ..., nstrings, each exactly once. */ seen = (char *) xcalloc (header.nstrings, 1); for (j = 0; j < header.hash_tab_size; j++) { nls_uint32 entry = get_uint32 (&bf, header.hash_tab_offset + j * 4); if (entry != 0) { i = entry - 1; if (!(i < header.nstrings && seen[i] == 0)) error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format: The hash table contains invalid entries."), filename); seen[i] = 1; } } for (i = 0; i < header.nstrings; i++) if (seen[i] == 0) error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format: Some messages are not present in the hash table."), filename); free (seen); /* Verify that the hash table lookup algorithm finds the entry for each message. */ for (i = 0; i < header.nstrings; i++) { size_t msgid_len; char *msgid = get_string (&bf, header.orig_tab_offset + i * 8, &msgid_len); nls_uint32 hash_val = hash_string (msgid); nls_uint32 idx = hash_val % header.hash_tab_size; nls_uint32 incr = 1 + (hash_val % (header.hash_tab_size - 2)); for (;;) { nls_uint32 entry = get_uint32 (&bf, header.hash_tab_offset + idx * 4); if (entry == 0) error (EXIT_FAILURE, 0, _("file \"%s\" is not in GNU .mo format: Some messages are at a wrong index in the hash table."), filename); if (entry == i + 1) break; if (idx >= header.hash_tab_size - incr) idx -= header.hash_tab_size - incr; else idx += incr; } } } for (i = 0; i < header.nstrings; i++) { message_ty *mp; char *msgctxt; char *msgid; size_t msgid_len; char *separator; char *msgstr; size_t msgstr_len; /* Read the msgctxt and msgid. */ msgid = get_string (&bf, header.orig_tab_offset + i * 8, &msgid_len); /* Split into msgctxt and msgid. */ separator = strchr (msgid, MSGCTXT_SEPARATOR); if (separator != NULL) { /* The part before the MSGCTXT_SEPARATOR is the msgctxt. */ *separator = '\0'; msgctxt = msgid; msgid = separator + 1; msgid_len -= msgid - msgctxt; } else msgctxt = NULL; /* Read the msgstr. */ msgstr = get_string (&bf, header.trans_tab_offset + i * 8, &msgstr_len); mp = message_alloc (msgctxt, msgid, (strlen (msgid) + 1 < msgid_len ? msgid + strlen (msgid) + 1 : NULL), msgstr, msgstr_len, &pos); message_list_append (mlp, mp); } switch (header.revision & 0xffff) { case 0: break; case 1: default: /* Fill the header parts that apply to minor revision >= 1. */ header.n_sysdep_segments = GET_HEADER_FIELD (n_sysdep_segments); header.sysdep_segments_offset = GET_HEADER_FIELD (sysdep_segments_offset); header.n_sysdep_strings = GET_HEADER_FIELD (n_sysdep_strings); header.orig_sysdep_tab_offset = GET_HEADER_FIELD (orig_sysdep_tab_offset); header.trans_sysdep_tab_offset = GET_HEADER_FIELD (trans_sysdep_tab_offset); for (i = 0; i < header.n_sysdep_strings; i++) { message_ty *mp; char *msgctxt; char *msgid; size_t msgid_len; char *separator; char *msgstr; size_t msgstr_len; nls_uint32 offset; size_t f; /* Read the msgctxt and msgid. */ offset = get_uint32 (&bf, header.orig_sysdep_tab_offset + i * 4); msgid = get_sysdep_string (&bf, offset, &header, &msgid_len); /* Split into msgctxt and msgid. */ separator = strchr (msgid, MSGCTXT_SEPARATOR); if (separator != NULL) { /* The part before the MSGCTXT_SEPARATOR is the msgctxt. */ *separator = '\0'; msgctxt = msgid; msgid = separator + 1; msgid_len -= msgid - msgctxt; } else msgctxt = NULL; /* Read the msgstr. */ offset = get_uint32 (&bf, header.trans_sysdep_tab_offset + i * 4); msgstr = get_sysdep_string (&bf, offset, &header, &msgstr_len); mp = message_alloc (msgctxt, msgid, (strlen (msgid) + 1 < msgid_len ? msgid + strlen (msgid) + 1 : NULL), msgstr, msgstr_len, &pos); /* Only messages with c-format or objc-format annotation are recognized as having system-dependent strings by msgfmt. Which one of the two, we don't know. We have to guess, assuming that c-format is more probable than objc-format and that the .mo was likely produced by "msgfmt -c". */ for (f = format_c; ; f = format_objc) { bool valid = true; struct formatstring_parser *parser = formatstring_parsers[f]; const char *str_end; const char *str; str_end = msgid + msgid_len; for (str = msgid; str < str_end; str += strlen (str) + 1) { char *invalid_reason = NULL; void *descr = parser->parse (str, false, NULL, &invalid_reason); if (descr != NULL) parser->free (descr); else { free (invalid_reason); valid = false; break; } } if (valid) { str_end = msgstr + msgstr_len; for (str = msgstr; str < str_end; str += strlen (str) + 1) { char *invalid_reason = NULL; void *descr = parser->parse (str, true, NULL, &invalid_reason); if (descr != NULL) parser->free (descr); else { free (invalid_reason); valid = false; break; } } } if (valid) { /* Found the most likely among c-format, objc-format. */ mp->is_format[f] = yes; break; } /* Try next f. */ if (f == format_objc) break; } message_list_append (mlp, mp); } break; } break; default: goto unrecognised; } if (fp != stdin) fclose (fp); }