• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #! /usr/bin/env python3
2 # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
3 
4 """Generate binary message catalog from textual translation description.
5 
6 This program converts a textual Uniforum-style message catalog (.po file) into
7 a binary GNU catalog (.mo file).  This is essentially the same function as the
8 GNU msgfmt program, however, it is a simpler implementation.  Currently it
9 does not handle plural forms but it does handle message contexts.
10 
11 Usage: msgfmt.py [OPTIONS] filename.po
12 
13 Options:
14     -o file
15     --output-file=file
16         Specify the output file to write to.  If omitted, output will go to a
17         file named filename.mo (based off the input file name).
18 
19     -h
20     --help
21         Print this message and exit.
22 
23     -V
24     --version
25         Display version information and exit.
26 """
27 
28 import os
29 import sys
30 import ast
31 import getopt
32 import struct
33 import array
34 from email.parser import HeaderParser
35 
36 __version__ = "1.2"
37 
38 MESSAGES = {}
39 
40 
41 def usage(code, msg=''):
42     print(__doc__, file=sys.stderr)
43     if msg:
44         print(msg, file=sys.stderr)
45     sys.exit(code)
46 
47 
48 def add(ctxt, id, str, fuzzy):
49     "Add a non-fuzzy translation to the dictionary."
50     global MESSAGES
51     if not fuzzy and str:
52         if ctxt is None:
53             MESSAGES[id] = str
54         else:
55             MESSAGES[b"%b\x04%b" % (ctxt, id)] = str
56 
57 
58 def generate():
59     "Return the generated output."
60     global MESSAGES
61     # the keys are sorted in the .mo file
62     keys = sorted(MESSAGES.keys())
63     offsets = []
64     ids = strs = b''
65     for id in keys:
66         # For each string, we need size and file offset.  Each string is NUL
67         # terminated; the NUL does not count into the size.
68         offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
69         ids += id + b'\0'
70         strs += MESSAGES[id] + b'\0'
71     output = ''
72     # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
73     # the keys start right after the index tables.
74     # translated string.
75     keystart = 7*4+16*len(keys)
76     # and the values start after the keys
77     valuestart = keystart + len(ids)
78     koffsets = []
79     voffsets = []
80     # The string table first has the list of keys, then the list of values.
81     # Each entry has first the size of the string, then the file offset.
82     for o1, l1, o2, l2 in offsets:
83         koffsets += [l1, o1+keystart]
84         voffsets += [l2, o2+valuestart]
85     offsets = koffsets + voffsets
86     output = struct.pack("Iiiiiii",
87                          0x950412de,       # Magic
88                          0,                 # Version
89                          len(keys),         # # of entries
90                          7*4,               # start of key index
91                          7*4+len(keys)*8,   # start of value index
92                          0, 0)              # size and offset of hash table
93     output += array.array("i", offsets).tobytes()
94     output += ids
95     output += strs
96     return output
97 
98 
99 def make(filename, outfile):
100     ID = 1
101     STR = 2
102     CTXT = 3
103 
104     # Compute .mo name from .po name and arguments
105     if filename.endswith('.po'):
106         infile = filename
107     else:
108         infile = filename + '.po'
109     if outfile is None:
110         outfile = os.path.splitext(infile)[0] + '.mo'
111 
112     try:
113         with open(infile, 'rb') as f:
114             lines = f.readlines()
115     except IOError as msg:
116         print(msg, file=sys.stderr)
117         sys.exit(1)
118 
119     section = msgctxt = None
120     fuzzy = 0
121 
122     # Start off assuming Latin-1, so everything decodes without failure,
123     # until we know the exact encoding
124     encoding = 'latin-1'
125 
126     # Parse the catalog
127     lno = 0
128     for l in lines:
129         l = l.decode(encoding)
130         lno += 1
131         # If we get a comment line after a msgstr, this is a new entry
132         if l[0] == '#' and section == STR:
133             add(msgctxt, msgid, msgstr, fuzzy)
134             section = msgctxt = None
135             fuzzy = 0
136         # Record a fuzzy mark
137         if l[:2] == '#,' and 'fuzzy' in l:
138             fuzzy = 1
139         # Skip comments
140         if l[0] == '#':
141             continue
142         # Now we are in a msgid or msgctxt section, output previous section
143         if l.startswith('msgctxt'):
144             if section == STR:
145                 add(msgctxt, msgid, msgstr, fuzzy)
146             section = CTXT
147             l = l[7:]
148             msgctxt = b''
149         elif l.startswith('msgid') and not l.startswith('msgid_plural'):
150             if section == STR:
151                 add(msgctxt, msgid, msgstr, fuzzy)
152                 if not msgid:
153                     # See whether there is an encoding declaration
154                     p = HeaderParser()
155                     charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
156                     if charset:
157                         encoding = charset
158             section = ID
159             l = l[5:]
160             msgid = msgstr = b''
161             is_plural = False
162         # This is a message with plural forms
163         elif l.startswith('msgid_plural'):
164             if section != ID:
165                 print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),
166                       file=sys.stderr)
167                 sys.exit(1)
168             l = l[12:]
169             msgid += b'\0' # separator of singular and plural
170             is_plural = True
171         # Now we are in a msgstr section
172         elif l.startswith('msgstr'):
173             section = STR
174             if l.startswith('msgstr['):
175                 if not is_plural:
176                     print('plural without msgid_plural on %s:%d' % (infile, lno),
177                           file=sys.stderr)
178                     sys.exit(1)
179                 l = l.split(']', 1)[1]
180                 if msgstr:
181                     msgstr += b'\0' # Separator of the various plural forms
182             else:
183                 if is_plural:
184                     print('indexed msgstr required for plural on  %s:%d' % (infile, lno),
185                           file=sys.stderr)
186                     sys.exit(1)
187                 l = l[6:]
188         # Skip empty lines
189         l = l.strip()
190         if not l:
191             continue
192         l = ast.literal_eval(l)
193         if section == CTXT:
194             msgctxt += l.encode(encoding)
195         elif section == ID:
196             msgid += l.encode(encoding)
197         elif section == STR:
198             msgstr += l.encode(encoding)
199         else:
200             print('Syntax error on %s:%d' % (infile, lno), \
201                   'before:', file=sys.stderr)
202             print(l, file=sys.stderr)
203             sys.exit(1)
204     # Add last entry
205     if section == STR:
206         add(msgctxt, msgid, msgstr, fuzzy)
207 
208     # Compute output
209     output = generate()
210 
211     try:
212         with open(outfile,"wb") as f:
213             f.write(output)
214     except IOError as msg:
215         print(msg, file=sys.stderr)
216 
217 
218 def main():
219     try:
220         opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
221                                    ['help', 'version', 'output-file='])
222     except getopt.error as msg:
223         usage(1, msg)
224 
225     outfile = None
226     # parse options
227     for opt, arg in opts:
228         if opt in ('-h', '--help'):
229             usage(0)
230         elif opt in ('-V', '--version'):
231             print("msgfmt.py", __version__)
232             sys.exit(0)
233         elif opt in ('-o', '--output-file'):
234             outfile = arg
235     # do it
236     if not args:
237         print('No input file given', file=sys.stderr)
238         print("Try `msgfmt --help' for more information.", file=sys.stderr)
239         return
240 
241     for filename in args:
242         make(filename, outfile)
243 
244 
245 if __name__ == '__main__':
246     main()
247