1#! /usr/bin/env python 2# -*- coding: iso-8859-1 -*- 3# Written by Martin v. L�wis <loewis@informatik.hu-berlin.de> 4 5"""Generate binary message catalog from textual translation description. 6 7This program converts a textual Uniforum-style message catalog (.po file) into 8a binary GNU catalog (.mo file). This is essentially the same function as the 9GNU msgfmt program, however, it is a simpler implementation. 10 11Usage: msgfmt.py [OPTIONS] filename.po 12 13Options: 14 -o file 15 --output-file=file 16 Specify the output file to write to. If omitted, output will go to a 17 file named filename.mo (based off the input file name). 18 19 -h 20 --help 21 Print this message and exit. 22 23 -V 24 --version 25 Display version information and exit. 26""" 27 28import os 29import sys 30import ast 31import getopt 32import struct 33import array 34 35__version__ = "1.1" 36 37MESSAGES = {} 38 39 40 41def usage(code, msg=''): 42 print >> sys.stderr, __doc__ 43 if msg: 44 print >> sys.stderr, msg 45 sys.exit(code) 46 47 48 49def add(id, str, fuzzy): 50 "Add a non-fuzzy translation to the dictionary." 51 global MESSAGES 52 if not fuzzy and str: 53 MESSAGES[id] = str 54 55 56 57def generate(): 58 "Return the generated output." 59 global MESSAGES 60 keys = MESSAGES.keys() 61 # the keys are sorted in the .mo file 62 keys.sort() 63 offsets = [] 64 ids = strs = '' 65 for id in keys: 66 # For each string, we need size and file offset. Each string is NUL 67 # terminated; the NUL does not count into the size. 68 offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) 69 ids += id + '\0' 70 strs += MESSAGES[id] + '\0' 71 output = '' 72 # The header is 7 32-bit unsigned integers. We don't use hash tables, so 73 # the keys start right after the index tables. 74 # translated string. 75 keystart = 7*4+16*len(keys) 76 # and the values start after the keys 77 valuestart = keystart + len(ids) 78 koffsets = [] 79 voffsets = [] 80 # The string table first has the list of keys, then the list of values. 81 # Each entry has first the size of the string, then the file offset. 82 for o1, l1, o2, l2 in offsets: 83 koffsets += [l1, o1+keystart] 84 voffsets += [l2, o2+valuestart] 85 offsets = koffsets + voffsets 86 output = struct.pack("Iiiiiii", 87 0x950412deL, # Magic 88 0, # Version 89 len(keys), # # of entries 90 7*4, # start of key index 91 7*4+len(keys)*8, # start of value index 92 0, 0) # size and offset of hash table 93 output += array.array("i", offsets).tostring() 94 output += ids 95 output += strs 96 return output 97 98 99 100def make(filename, outfile): 101 ID = 1 102 STR = 2 103 104 # Compute .mo name from .po name and arguments 105 if filename.endswith('.po'): 106 infile = filename 107 else: 108 infile = filename + '.po' 109 if outfile is None: 110 outfile = os.path.splitext(infile)[0] + '.mo' 111 112 try: 113 lines = open(infile).readlines() 114 except IOError, msg: 115 print >> sys.stderr, msg 116 sys.exit(1) 117 118 section = None 119 fuzzy = 0 120 121 # Parse the catalog 122 lno = 0 123 for l in lines: 124 lno += 1 125 # If we get a comment line after a msgstr, this is a new entry 126 if l[0] == '#' and section == STR: 127 add(msgid, msgstr, fuzzy) 128 section = None 129 fuzzy = 0 130 # Record a fuzzy mark 131 if l[:2] == '#,' and 'fuzzy' in l: 132 fuzzy = 1 133 # Skip comments 134 if l[0] == '#': 135 continue 136 # Now we are in a msgid section, output previous section 137 if l.startswith('msgid') and not l.startswith('msgid_plural'): 138 if section == STR: 139 add(msgid, msgstr, fuzzy) 140 section = ID 141 l = l[5:] 142 msgid = msgstr = '' 143 is_plural = False 144 # This is a message with plural forms 145 elif l.startswith('msgid_plural'): 146 if section != ID: 147 print >> sys.stderr, 'msgid_plural not preceded by msgid on %s:%d' %\ 148 (infile, lno) 149 sys.exit(1) 150 l = l[12:] 151 msgid += '\0' # separator of singular and plural 152 is_plural = True 153 # Now we are in a msgstr section 154 elif l.startswith('msgstr'): 155 section = STR 156 if l.startswith('msgstr['): 157 if not is_plural: 158 print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\ 159 (infile, lno) 160 sys.exit(1) 161 l = l.split(']', 1)[1] 162 if msgstr: 163 msgstr += '\0' # Separator of the various plural forms 164 else: 165 if is_plural: 166 print >> sys.stderr, 'indexed msgstr required for plural on %s:%d' %\ 167 (infile, lno) 168 sys.exit(1) 169 l = l[6:] 170 # Skip empty lines 171 l = l.strip() 172 if not l: 173 continue 174 l = ast.literal_eval(l) 175 if section == ID: 176 msgid += l 177 elif section == STR: 178 msgstr += l 179 else: 180 print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \ 181 'before:' 182 print >> sys.stderr, l 183 sys.exit(1) 184 # Add last entry 185 if section == STR: 186 add(msgid, msgstr, fuzzy) 187 188 # Compute output 189 output = generate() 190 191 try: 192 open(outfile,"wb").write(output) 193 except IOError,msg: 194 print >> sys.stderr, msg 195 196 197 198def main(): 199 try: 200 opts, args = getopt.getopt(sys.argv[1:], 'hVo:', 201 ['help', 'version', 'output-file=']) 202 except getopt.error, msg: 203 usage(1, msg) 204 205 outfile = None 206 # parse options 207 for opt, arg in opts: 208 if opt in ('-h', '--help'): 209 usage(0) 210 elif opt in ('-V', '--version'): 211 print >> sys.stderr, "msgfmt.py", __version__ 212 sys.exit(0) 213 elif opt in ('-o', '--output-file'): 214 outfile = arg 215 # do it 216 if not args: 217 print >> sys.stderr, 'No input file given' 218 print >> sys.stderr, "Try `msgfmt --help' for more information." 219 return 220 221 for filename in args: 222 make(filename, outfile) 223 224 225if __name__ == '__main__': 226 main() 227