1#! /usr/bin/env python 2# -*- coding: iso-8859-1 -*- 3# Written by Martin v. L�wis <loewis@informatik.hu-berlin.de> 4 5"""Generate binary message catalog from textual translation description. 6 7This program converts a textual Uniforum-style message catalog (.po file) into 8a binary GNU catalog (.mo file). This is essentially the same function as the 9GNU msgfmt program, however, it is a simpler implementation. 10 11Usage: msgfmt.py [OPTIONS] filename.po 12 13Options: 14 -o file 15 --output-file=file 16 Specify the output file to write to. If omitted, output will go to a 17 file named filename.mo (based off the input file name). 18 19 -h 20 --help 21 Print this message and exit. 22 23 -V 24 --version 25 Display version information and exit. 26""" 27 28import sys 29import os 30import getopt 31import struct 32import array 33 34__version__ = "1.1" 35 36MESSAGES = {} 37 38 39 40def usage(code, msg=''): 41 print >> sys.stderr, __doc__ 42 if msg: 43 print >> sys.stderr, msg 44 sys.exit(code) 45 46 47 48def add(id, str, fuzzy): 49 "Add a non-fuzzy translation to the dictionary." 50 global MESSAGES 51 if not fuzzy and str: 52 MESSAGES[id] = str 53 54 55 56def generate(): 57 "Return the generated output." 58 global MESSAGES 59 keys = MESSAGES.keys() 60 # the keys are sorted in the .mo file 61 keys.sort() 62 offsets = [] 63 ids = strs = '' 64 for id in keys: 65 # For each string, we need size and file offset. Each string is NUL 66 # terminated; the NUL does not count into the size. 67 offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) 68 ids += id + '\0' 69 strs += MESSAGES[id] + '\0' 70 output = '' 71 # The header is 7 32-bit unsigned integers. We don't use hash tables, so 72 # the keys start right after the index tables. 73 # translated string. 74 keystart = 7*4+16*len(keys) 75 # and the values start after the keys 76 valuestart = keystart + len(ids) 77 koffsets = [] 78 voffsets = [] 79 # The string table first has the list of keys, then the list of values. 80 # Each entry has first the size of the string, then the file offset. 81 for o1, l1, o2, l2 in offsets: 82 koffsets += [l1, o1+keystart] 83 voffsets += [l2, o2+valuestart] 84 offsets = koffsets + voffsets 85 output = struct.pack("Iiiiiii", 86 0x950412deL, # Magic 87 0, # Version 88 len(keys), # # of entries 89 7*4, # start of key index 90 7*4+len(keys)*8, # start of value index 91 0, 0) # size and offset of hash table 92 output += array.array("i", offsets).tostring() 93 output += ids 94 output += strs 95 return output 96 97 98 99def make(filename, outfile): 100 ID = 1 101 STR = 2 102 103 # Compute .mo name from .po name and arguments 104 if filename.endswith('.po'): 105 infile = filename 106 else: 107 infile = filename + '.po' 108 if outfile is None: 109 outfile = os.path.splitext(infile)[0] + '.mo' 110 111 try: 112 lines = open(infile).readlines() 113 except IOError, msg: 114 print >> sys.stderr, msg 115 sys.exit(1) 116 117 section = None 118 fuzzy = 0 119 120 # Parse the catalog 121 lno = 0 122 for l in lines: 123 lno += 1 124 # If we get a comment line after a msgstr, this is a new entry 125 if l[0] == '#' and section == STR: 126 add(msgid, msgstr, fuzzy) 127 section = None 128 fuzzy = 0 129 # Record a fuzzy mark 130 if l[:2] == '#,' and 'fuzzy' in l: 131 fuzzy = 1 132 # Skip comments 133 if l[0] == '#': 134 continue 135 # Now we are in a msgid section, output previous section 136 if l.startswith('msgid') and not l.startswith('msgid_plural'): 137 if section == STR: 138 add(msgid, msgstr, fuzzy) 139 section = ID 140 l = l[5:] 141 msgid = msgstr = '' 142 is_plural = False 143 # This is a message with plural forms 144 elif l.startswith('msgid_plural'): 145 if section != ID: 146 print >> sys.stderr, 'msgid_plural not preceeded by msgid on %s:%d' %\ 147 (infile, lno) 148 sys.exit(1) 149 l = l[12:] 150 msgid += '\0' # separator of singular and plural 151 is_plural = True 152 # Now we are in a msgstr section 153 elif l.startswith('msgstr'): 154 section = STR 155 if l.startswith('msgstr['): 156 if not is_plural: 157 print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\ 158 (infile, lno) 159 sys.exit(1) 160 l = l.split(']', 1)[1] 161 if msgstr: 162 msgstr += '\0' # Separator of the various plural forms 163 else: 164 if is_plural: 165 print >> sys.stderr, 'indexed msgstr required for plural on %s:%d' %\ 166 (infile, lno) 167 sys.exit(1) 168 l = l[6:] 169 # Skip empty lines 170 l = l.strip() 171 if not l: 172 continue 173 # XXX: Does this always follow Python escape semantics? 174 l = eval(l) 175 if section == ID: 176 msgid += l 177 elif section == STR: 178 msgstr += l 179 else: 180 print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \ 181 'before:' 182 print >> sys.stderr, l 183 sys.exit(1) 184 # Add last entry 185 if section == STR: 186 add(msgid, msgstr, fuzzy) 187 188 # Compute output 189 output = generate() 190 191 try: 192 open(outfile,"wb").write(output) 193 except IOError,msg: 194 print >> sys.stderr, msg 195 196 197 198def main(): 199 try: 200 opts, args = getopt.getopt(sys.argv[1:], 'hVo:', 201 ['help', 'version', 'output-file=']) 202 except getopt.error, msg: 203 usage(1, msg) 204 205 outfile = None 206 # parse options 207 for opt, arg in opts: 208 if opt in ('-h', '--help'): 209 usage(0) 210 elif opt in ('-V', '--version'): 211 print >> sys.stderr, "msgfmt.py", __version__ 212 sys.exit(0) 213 elif opt in ('-o', '--output-file'): 214 outfile = arg 215 # do it 216 if not args: 217 print >> sys.stderr, 'No input file given' 218 print >> sys.stderr, "Try `msgfmt --help' for more information." 219 return 220 221 for filename in args: 222 make(filename, outfile) 223 224 225if __name__ == '__main__': 226 main() 227