1 #! /usr/bin/env python3 2 # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de> 3 4 """Generate binary message catalog from textual translation description. 5 6 This program converts a textual Uniforum-style message catalog (.po file) into 7 a binary GNU catalog (.mo file). This is essentially the same function as the 8 GNU msgfmt program, however, it is a simpler implementation. Currently it 9 does not handle plural forms but it does handle message contexts. 10 11 Usage: msgfmt.py [OPTIONS] filename.po 12 13 Options: 14 -o file 15 --output-file=file 16 Specify the output file to write to. If omitted, output will go to a 17 file named filename.mo (based off the input file name). 18 19 -h 20 --help 21 Print this message and exit. 22 23 -V 24 --version 25 Display version information and exit. 26 """ 27 28 import os 29 import sys 30 import ast 31 import getopt 32 import struct 33 import array 34 from email.parser import HeaderParser 35 36 __version__ = "1.2" 37 38 MESSAGES = {} 39 40 41 def usage(code, msg=''): 42 print(__doc__, file=sys.stderr) 43 if msg: 44 print(msg, file=sys.stderr) 45 sys.exit(code) 46 47 48 def add(ctxt, id, str, fuzzy): 49 "Add a non-fuzzy translation to the dictionary." 50 global MESSAGES 51 if not fuzzy and str: 52 if ctxt is None: 53 MESSAGES[id] = str 54 else: 55 MESSAGES[b"%b\x04%b" % (ctxt, id)] = str 56 57 58 def generate(): 59 "Return the generated output." 60 global MESSAGES 61 # the keys are sorted in the .mo file 62 keys = sorted(MESSAGES.keys()) 63 offsets = [] 64 ids = strs = b'' 65 for id in keys: 66 # For each string, we need size and file offset. Each string is NUL 67 # terminated; the NUL does not count into the size. 68 offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) 69 ids += id + b'\0' 70 strs += MESSAGES[id] + b'\0' 71 output = '' 72 # The header is 7 32-bit unsigned integers. We don't use hash tables, so 73 # the keys start right after the index tables. 74 # translated string. 75 keystart = 7*4+16*len(keys) 76 # and the values start after the keys 77 valuestart = keystart + len(ids) 78 koffsets = [] 79 voffsets = [] 80 # The string table first has the list of keys, then the list of values. 81 # Each entry has first the size of the string, then the file offset. 82 for o1, l1, o2, l2 in offsets: 83 koffsets += [l1, o1+keystart] 84 voffsets += [l2, o2+valuestart] 85 offsets = koffsets + voffsets 86 output = struct.pack("Iiiiiii", 87 0x950412de, # Magic 88 0, # Version 89 len(keys), # # of entries 90 7*4, # start of key index 91 7*4+len(keys)*8, # start of value index 92 0, 0) # size and offset of hash table 93 output += array.array("i", offsets).tobytes() 94 output += ids 95 output += strs 96 return output 97 98 99 def make(filename, outfile): 100 ID = 1 101 STR = 2 102 CTXT = 3 103 104 # Compute .mo name from .po name and arguments 105 if filename.endswith('.po'): 106 infile = filename 107 else: 108 infile = filename + '.po' 109 if outfile is None: 110 outfile = os.path.splitext(infile)[0] + '.mo' 111 112 try: 113 with open(infile, 'rb') as f: 114 lines = f.readlines() 115 except IOError as msg: 116 print(msg, file=sys.stderr) 117 sys.exit(1) 118 119 section = msgctxt = None 120 fuzzy = 0 121 122 # Start off assuming Latin-1, so everything decodes without failure, 123 # until we know the exact encoding 124 encoding = 'latin-1' 125 126 # Parse the catalog 127 lno = 0 128 for l in lines: 129 l = l.decode(encoding) 130 lno += 1 131 # If we get a comment line after a msgstr, this is a new entry 132 if l[0] == '#' and section == STR: 133 add(msgctxt, msgid, msgstr, fuzzy) 134 section = msgctxt = None 135 fuzzy = 0 136 # Record a fuzzy mark 137 if l[:2] == '#,' and 'fuzzy' in l: 138 fuzzy = 1 139 # Skip comments 140 if l[0] == '#': 141 continue 142 # Now we are in a msgid or msgctxt section, output previous section 143 if l.startswith('msgctxt'): 144 if section == STR: 145 add(msgctxt, msgid, msgstr, fuzzy) 146 section = CTXT 147 l = l[7:] 148 msgctxt = b'' 149 elif l.startswith('msgid') and not l.startswith('msgid_plural'): 150 if section == STR: 151 add(msgctxt, msgid, msgstr, fuzzy) 152 if not msgid: 153 # See whether there is an encoding declaration 154 p = HeaderParser() 155 charset = p.parsestr(msgstr.decode(encoding)).get_content_charset() 156 if charset: 157 encoding = charset 158 section = ID 159 l = l[5:] 160 msgid = msgstr = b'' 161 is_plural = False 162 # This is a message with plural forms 163 elif l.startswith('msgid_plural'): 164 if section != ID: 165 print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno), 166 file=sys.stderr) 167 sys.exit(1) 168 l = l[12:] 169 msgid += b'\0' # separator of singular and plural 170 is_plural = True 171 # Now we are in a msgstr section 172 elif l.startswith('msgstr'): 173 section = STR 174 if l.startswith('msgstr['): 175 if not is_plural: 176 print('plural without msgid_plural on %s:%d' % (infile, lno), 177 file=sys.stderr) 178 sys.exit(1) 179 l = l.split(']', 1)[1] 180 if msgstr: 181 msgstr += b'\0' # Separator of the various plural forms 182 else: 183 if is_plural: 184 print('indexed msgstr required for plural on %s:%d' % (infile, lno), 185 file=sys.stderr) 186 sys.exit(1) 187 l = l[6:] 188 # Skip empty lines 189 l = l.strip() 190 if not l: 191 continue 192 l = ast.literal_eval(l) 193 if section == CTXT: 194 msgctxt += l.encode(encoding) 195 elif section == ID: 196 msgid += l.encode(encoding) 197 elif section == STR: 198 msgstr += l.encode(encoding) 199 else: 200 print('Syntax error on %s:%d' % (infile, lno), \ 201 'before:', file=sys.stderr) 202 print(l, file=sys.stderr) 203 sys.exit(1) 204 # Add last entry 205 if section == STR: 206 add(msgctxt, msgid, msgstr, fuzzy) 207 208 # Compute output 209 output = generate() 210 211 try: 212 with open(outfile,"wb") as f: 213 f.write(output) 214 except IOError as msg: 215 print(msg, file=sys.stderr) 216 217 218 def main(): 219 try: 220 opts, args = getopt.getopt(sys.argv[1:], 'hVo:', 221 ['help', 'version', 'output-file=']) 222 except getopt.error as msg: 223 usage(1, msg) 224 225 outfile = None 226 # parse options 227 for opt, arg in opts: 228 if opt in ('-h', '--help'): 229 usage(0) 230 elif opt in ('-V', '--version'): 231 print("msgfmt.py", __version__) 232 sys.exit(0) 233 elif opt in ('-o', '--output-file'): 234 outfile = arg 235 # do it 236 if not args: 237 print('No input file given', file=sys.stderr) 238 print("Try `msgfmt --help' for more information.", file=sys.stderr) 239 return 240 241 for filename in args: 242 make(filename, outfile) 243 244 245 if __name__ == '__main__': 246 main() 247