1#! /usr/bin/env python3 2# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de> 3 4"""Generate binary message catalog from textual translation description. 5 6This program converts a textual Uniforum-style message catalog (.po file) into 7a binary GNU catalog (.mo file). This is essentially the same function as the 8GNU msgfmt program, however, it is a simpler implementation. Currently it 9does not handle plural forms but it does handle message contexts. 10 11Usage: msgfmt.py [OPTIONS] filename.po 12 13Options: 14 -o file 15 --output-file=file 16 Specify the output file to write to. If omitted, output will go to a 17 file named filename.mo (based off the input file name). 18 19 -h 20 --help 21 Print this message and exit. 22 23 -V 24 --version 25 Display version information and exit. 26""" 27 28import os 29import sys 30import ast 31import getopt 32import struct 33import array 34from email.parser import HeaderParser 35 36__version__ = "1.2" 37 38MESSAGES = {} 39 40 41def usage(code, msg=''): 42 print(__doc__, file=sys.stderr) 43 if msg: 44 print(msg, file=sys.stderr) 45 sys.exit(code) 46 47 48def add(ctxt, id, str, fuzzy): 49 "Add a non-fuzzy translation to the dictionary." 50 global MESSAGES 51 if not fuzzy and str: 52 if ctxt is None: 53 MESSAGES[id] = str 54 else: 55 MESSAGES[b"%b\x04%b" % (ctxt, id)] = str 56 57 58def generate(): 59 "Return the generated output." 60 global MESSAGES 61 # the keys are sorted in the .mo file 62 keys = sorted(MESSAGES.keys()) 63 offsets = [] 64 ids = strs = b'' 65 for id in keys: 66 # For each string, we need size and file offset. Each string is NUL 67 # terminated; the NUL does not count into the size. 68 offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) 69 ids += id + b'\0' 70 strs += MESSAGES[id] + b'\0' 71 output = '' 72 # The header is 7 32-bit unsigned integers. We don't use hash tables, so 73 # the keys start right after the index tables. 74 # translated string. 75 keystart = 7*4+16*len(keys) 76 # and the values start after the keys 77 valuestart = keystart + len(ids) 78 koffsets = [] 79 voffsets = [] 80 # The string table first has the list of keys, then the list of values. 81 # Each entry has first the size of the string, then the file offset. 82 for o1, l1, o2, l2 in offsets: 83 koffsets += [l1, o1+keystart] 84 voffsets += [l2, o2+valuestart] 85 offsets = koffsets + voffsets 86 output = struct.pack("Iiiiiii", 87 0x950412de, # Magic 88 0, # Version 89 len(keys), # # of entries 90 7*4, # start of key index 91 7*4+len(keys)*8, # start of value index 92 0, 0) # size and offset of hash table 93 output += array.array("i", offsets).tobytes() 94 output += ids 95 output += strs 96 return output 97 98 99def make(filename, outfile): 100 ID = 1 101 STR = 2 102 CTXT = 3 103 104 # Compute .mo name from .po name and arguments 105 if filename.endswith('.po'): 106 infile = filename 107 else: 108 infile = filename + '.po' 109 if outfile is None: 110 outfile = os.path.splitext(infile)[0] + '.mo' 111 112 try: 113 with open(infile, 'rb') as f: 114 lines = f.readlines() 115 except IOError as msg: 116 print(msg, file=sys.stderr) 117 sys.exit(1) 118 119 section = msgctxt = None 120 fuzzy = 0 121 122 # Start off assuming Latin-1, so everything decodes without failure, 123 # until we know the exact encoding 124 encoding = 'latin-1' 125 126 # Parse the catalog 127 lno = 0 128 for l in lines: 129 l = l.decode(encoding) 130 lno += 1 131 # If we get a comment line after a msgstr, this is a new entry 132 if l[0] == '#' and section == STR: 133 add(msgctxt, msgid, msgstr, fuzzy) 134 section = msgctxt = None 135 fuzzy = 0 136 # Record a fuzzy mark 137 if l[:2] == '#,' and 'fuzzy' in l: 138 fuzzy = 1 139 # Skip comments 140 if l[0] == '#': 141 continue 142 # Now we are in a msgid or msgctxt section, output previous section 143 if l.startswith('msgctxt'): 144 if section == STR: 145 add(msgctxt, msgid, msgstr, fuzzy) 146 section = CTXT 147 l = l[7:] 148 msgctxt = b'' 149 elif l.startswith('msgid') and not l.startswith('msgid_plural'): 150 if section == STR: 151 add(msgctxt, msgid, msgstr, fuzzy) 152 if not msgid: 153 # See whether there is an encoding declaration 154 p = HeaderParser() 155 charset = p.parsestr(msgstr.decode(encoding)).get_content_charset() 156 if charset: 157 encoding = charset 158 section = ID 159 l = l[5:] 160 msgid = msgstr = b'' 161 is_plural = False 162 # This is a message with plural forms 163 elif l.startswith('msgid_plural'): 164 if section != ID: 165 print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno), 166 file=sys.stderr) 167 sys.exit(1) 168 l = l[12:] 169 msgid += b'\0' # separator of singular and plural 170 is_plural = True 171 # Now we are in a msgstr section 172 elif l.startswith('msgstr'): 173 section = STR 174 if l.startswith('msgstr['): 175 if not is_plural: 176 print('plural without msgid_plural on %s:%d' % (infile, lno), 177 file=sys.stderr) 178 sys.exit(1) 179 l = l.split(']', 1)[1] 180 if msgstr: 181 msgstr += b'\0' # Separator of the various plural forms 182 else: 183 if is_plural: 184 print('indexed msgstr required for plural on %s:%d' % (infile, lno), 185 file=sys.stderr) 186 sys.exit(1) 187 l = l[6:] 188 # Skip empty lines 189 l = l.strip() 190 if not l: 191 continue 192 l = ast.literal_eval(l) 193 if section == CTXT: 194 msgctxt += l.encode(encoding) 195 elif section == ID: 196 msgid += l.encode(encoding) 197 elif section == STR: 198 msgstr += l.encode(encoding) 199 else: 200 print('Syntax error on %s:%d' % (infile, lno), \ 201 'before:', file=sys.stderr) 202 print(l, file=sys.stderr) 203 sys.exit(1) 204 # Add last entry 205 if section == STR: 206 add(msgctxt, msgid, msgstr, fuzzy) 207 208 # Compute output 209 output = generate() 210 211 try: 212 with open(outfile,"wb") as f: 213 f.write(output) 214 except IOError as msg: 215 print(msg, file=sys.stderr) 216 217 218def main(): 219 try: 220 opts, args = getopt.getopt(sys.argv[1:], 'hVo:', 221 ['help', 'version', 'output-file=']) 222 except getopt.error as msg: 223 usage(1, msg) 224 225 outfile = None 226 # parse options 227 for opt, arg in opts: 228 if opt in ('-h', '--help'): 229 usage(0) 230 elif opt in ('-V', '--version'): 231 print("msgfmt.py", __version__) 232 sys.exit(0) 233 elif opt in ('-o', '--output-file'): 234 outfile = arg 235 # do it 236 if not args: 237 print('No input file given', file=sys.stderr) 238 print("Try `msgfmt --help' for more information.", file=sys.stderr) 239 return 240 241 for filename in args: 242 make(filename, outfile) 243 244 245if __name__ == '__main__': 246 main() 247