1#!/usr/bin/env python 2# Copyright (c) 2012 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6'''The 'grit rc2grd' tool.''' 7 8 9import os.path 10import getopt 11import re 12import StringIO 13import types 14 15import grit.node.empty 16from grit.node import include 17from grit.node import structure 18from grit.node import message 19 20from grit.gather import rc 21from grit.gather import tr_html 22 23from grit.tool import interface 24from grit.tool import postprocess_interface 25from grit.tool import preprocess_interface 26 27from grit import grd_reader 28from grit import lazy_re 29from grit import tclib 30from grit import util 31 32 33# Matches files referenced from an .rc file 34_FILE_REF = lazy_re.compile(''' 35 ^(?P<id>[A-Z_0-9.]+)[ \t]+ 36 (?P<type>[A-Z_0-9]+)[ \t]+ 37 "(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE) 38 39 40# Matches a dialog section 41_DIALOG = lazy_re.compile( 42 '^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s*$', 43 re.MULTILINE | re.DOTALL) 44 45 46# Matches a menu section 47_MENU = lazy_re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$', 48 re.MULTILINE | re.DOTALL) 49 50 51# Matches a versioninfo section 52_VERSIONINFO = lazy_re.compile( 53 '^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s*$.+?^END\s*$', 54 re.MULTILINE | re.DOTALL) 55 56 57# Matches a stringtable 58_STRING_TABLE = lazy_re.compile( 59 ('^STRINGTABLE(\s+(PRELOAD|DISCARDABLE|CHARACTERISTICS.+|LANGUAGE.+|' 60 'VERSION.+))*\s*\nBEGIN\s*$(?P<body>.+?)^END\s*$'), 61 re.MULTILINE | re.DOTALL) 62 63 64# Matches each message inside a stringtable, breaking it up into comments, 65# the ID of the message, and the (RC-escaped) message text. 66_MESSAGE = lazy_re.compile(''' 67 (?P<comment>(^\s+//.+?)*) # 0 or more lines of comments preceding the message 68 ^\s* 69 (?P<id>[A-Za-z0-9_]+) # id 70 \s+ 71 "(?P<text>.*?([^"]|""))"([^"]|$) # The message itself 72 ''', re.MULTILINE | re.DOTALL | re.VERBOSE) 73 74 75# Matches each line of comment text in a multi-line comment. 76_COMMENT_TEXT = lazy_re.compile('^\s*//\s*(?P<text>.+?)$', re.MULTILINE) 77 78 79# Matches a string that is empty or all whitespace 80_WHITESPACE_ONLY = lazy_re.compile('\A\s*\Z', re.MULTILINE) 81 82 83# Finds printf and FormatMessage style format specifiers 84# Uses non-capturing groups except for the outermost group, so the output of 85# re.split() should include both the normal text and what we intend to 86# replace with placeholders. 87# TODO(joi) Check documentation for printf (and Windows variants) and FormatMessage 88_FORMAT_SPECIFIER = lazy_re.compile( 89 '(%[-# +]?(?:[0-9]*|\*)(?:\.(?:[0-9]+|\*))?(?:h|l|L)?' # printf up to last char 90 '(?:d|i|o|u|x|X|e|E|f|F|g|G|c|r|s|ls|ws)' # printf last char 91 '|\$[1-9][0-9]*)') # FormatMessage 92 93 94class Rc2Grd(interface.Tool): 95 '''A tool for converting .rc files to .grd files. This tool is only for 96converting the source (nontranslated) .rc file to a .grd file. For importing 97existing translations, use the rc2xtb tool. 98 99Usage: grit [global options] rc2grd [OPTIONS] RCFILE 100 101The tool takes a single argument, which is the path to the .rc file to convert. 102It outputs a .grd file with the same name in the same directory as the .rc file. 103The .grd file may have one or more TODO comments for things that have to be 104cleaned up manually. 105 106OPTIONS may be any of the following: 107 108 -e ENCODING Specify the ENCODING of the .rc file. Default is 'cp1252'. 109 110 -h TYPE Specify the TYPE attribute for HTML structures. 111 Default is 'tr_html'. 112 113 -u ENCODING Specify the ENCODING of HTML files. Default is 'utf-8'. 114 115 -n MATCH Specify the regular expression to match in comments that will 116 indicate that the resource the comment belongs to is not 117 translateable. Default is 'Not locali(s|z)able'. 118 119 -r GRDFILE Specify that GRDFILE should be used as a "role model" for 120 any placeholders that otherwise would have had TODO names. 121 This attempts to find an identical message in the GRDFILE 122 and uses that instead of the automatically placeholderized 123 message. 124 125 --pre CLASS Specify an optional, fully qualified classname, which 126 has to be a subclass of grit.tool.PreProcessor, to 127 run on the text of the RC file before conversion occurs. 128 This can be used to support constructs in the RC files 129 that GRIT cannot handle on its own. 130 131 --post CLASS Specify an optional, fully qualified classname, which 132 has to be a subclass of grit.tool.PostProcessor, to 133 run on the text of the converted RC file. 134 This can be used to alter the content of the RC file 135 based on the conversion that occured. 136 137For menus, dialogs and version info, the .grd file will refer to the original 138.rc file. Once conversion is complete, you can strip the original .rc file 139of its string table and all comments as these will be available in the .grd 140file. 141 142Note that this tool WILL NOT obey C preprocessor rules, so even if something 143is #if 0-ed out it will still be included in the output of this tool 144Therefore, if your .rc file contains sections like this, you should run the 145C preprocessor on the .rc file or manually edit it before using this tool. 146''' 147 148 def ShortDescription(self): 149 return 'A tool for converting .rc source files to .grd files.' 150 151 def __init__(self): 152 self.input_encoding = 'cp1252' 153 self.html_type = 'tr_html' 154 self.html_encoding = 'utf-8' 155 self.not_localizable_re = re.compile('Not locali(s|z)able') 156 self.role_model = None 157 self.pre_process = None 158 self.post_process = None 159 160 def ParseOptions(self, args): 161 '''Given a list of arguments, set this object's options and return 162 all non-option arguments. 163 ''' 164 (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post=']) 165 for (key, val) in own_opts: 166 if key == '-e': 167 self.input_encoding = val 168 elif key == '-h': 169 self.html_type = val 170 elif key == '-u': 171 self.html_encoding = val 172 elif key == '-n': 173 self.not_localizable_re = re.compile(val) 174 elif key == '-r': 175 self.role_model = grd_reader.Parse(val) 176 elif key == '--pre': 177 self.pre_process = val 178 elif key == '--post': 179 self.post_process = val 180 return args 181 182 def Run(self, opts, args): 183 args = self.ParseOptions(args) 184 if len(args) != 1: 185 print ('This tool takes a single tool-specific argument, the path to the\n' 186 '.rc file to process.') 187 return 2 188 self.SetOptions(opts) 189 190 path = args[0] 191 out_path = os.path.join(util.dirname(path), 192 os.path.splitext(os.path.basename(path))[0] + '.grd') 193 194 rctext = util.ReadFile(path, self.input_encoding) 195 grd_text = unicode(self.Process(rctext, path)) 196 with util.WrapOutputStream(file(out_path, 'w'), 'utf-8') as outfile: 197 outfile.write(grd_text) 198 199 print 'Wrote output file %s.\nPlease check for TODO items in the file.' % out_path 200 201 202 def Process(self, rctext, rc_path): 203 '''Processes 'rctext' and returns a resource tree corresponding to it. 204 205 Args: 206 rctext: complete text of the rc file 207 rc_path: 'resource\resource.rc' 208 209 Return: 210 grit.node.base.Node subclass 211 ''' 212 213 if self.pre_process: 214 preprocess_class = util.NewClassInstance(self.pre_process, 215 preprocess_interface.PreProcessor) 216 if preprocess_class: 217 rctext = preprocess_class.Process(rctext, rc_path) 218 else: 219 self.Out( 220 'PreProcessing class could not be found. Skipping preprocessing.\n') 221 222 # Start with a basic skeleton for the .grd file 223 root = grd_reader.Parse(StringIO.StringIO( 224 '''<?xml version="1.0" encoding="UTF-8"?> 225 <grit base_dir="." latest_public_release="0" 226 current_release="1" source_lang_id="en"> 227 <outputs /> 228 <translations /> 229 <release seq="1"> 230 <includes /> 231 <structures /> 232 <messages /> 233 </release> 234 </grit>'''), util.dirname(rc_path)) 235 includes = root.children[2].children[0] 236 structures = root.children[2].children[1] 237 messages = root.children[2].children[2] 238 assert (isinstance(includes, grit.node.empty.IncludesNode) and 239 isinstance(structures, grit.node.empty.StructuresNode) and 240 isinstance(messages, grit.node.empty.MessagesNode)) 241 242 self.AddIncludes(rctext, includes) 243 self.AddStructures(rctext, structures, os.path.basename(rc_path)) 244 self.AddMessages(rctext, messages) 245 246 self.VerboseOut('Validating that all IDs are unique...\n') 247 root.ValidateUniqueIds() 248 self.ExtraVerboseOut('Done validating that all IDs are unique.\n') 249 250 if self.post_process: 251 postprocess_class = util.NewClassInstance(self.post_process, 252 postprocess_interface.PostProcessor) 253 if postprocess_class: 254 root = postprocess_class.Process(rctext, rc_path, root) 255 else: 256 self.Out( 257 'PostProcessing class could not be found. Skipping postprocessing.\n') 258 259 return root 260 261 262 def IsHtml(self, res_type, fname): 263 '''Check whether both the type and file extension indicate HTML''' 264 fext = fname.split('.')[-1].lower() 265 return res_type == 'HTML' and fext in ('htm', 'html') 266 267 268 def AddIncludes(self, rctext, node): 269 '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and 270 adds each included resource as an <include> child node of 'node'.''' 271 for m in _FILE_REF.finditer(rctext): 272 id = m.group('id') 273 res_type = m.group('type').upper() 274 fname = rc.Section.UnEscape(m.group('file')) 275 assert fname.find('\n') == -1 276 if not self.IsHtml(res_type, fname): 277 self.VerboseOut('Processing %s with ID %s (filename: %s)\n' % 278 (res_type, id, fname)) 279 node.AddChild(include.IncludeNode.Construct(node, id, res_type, fname)) 280 281 282 def AddStructures(self, rctext, node, rc_filename): 283 '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version 284 information resources and HTML templates) and adds each as a <structure> 285 child of 'node'.''' 286 # First add HTML includes 287 for m in _FILE_REF.finditer(rctext): 288 id = m.group('id') 289 res_type = m.group('type').upper() 290 fname = rc.Section.UnEscape(m.group('file')) 291 if self.IsHtml(type, fname): 292 node.AddChild(structure.StructureNode.Construct( 293 node, id, self.html_type, fname, self.html_encoding)) 294 295 # Then add all RC includes 296 def AddStructure(res_type, id): 297 self.VerboseOut('Processing %s with ID %s\n' % (res_type, id)) 298 node.AddChild(structure.StructureNode.Construct(node, id, res_type, 299 rc_filename, 300 encoding=self.input_encoding)) 301 for m in _MENU.finditer(rctext): 302 AddStructure('menu', m.group('id')) 303 for m in _DIALOG.finditer(rctext): 304 AddStructure('dialog', m.group('id')) 305 for m in _VERSIONINFO.finditer(rctext): 306 AddStructure('version', m.group('id')) 307 308 309 def AddMessages(self, rctext, node): 310 '''Scans 'rctext' for all messages in string tables, preprocesses them as 311 much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d 312 type format specifiers get those specifiers replaced with placeholders, and 313 HTML-formatted messages get run through the HTML-placeholderizer). Adds 314 each message as a <message> node child of 'node'.''' 315 for tm in _STRING_TABLE.finditer(rctext): 316 table = tm.group('body') 317 for mm in _MESSAGE.finditer(table): 318 comment_block = mm.group('comment') 319 comment_text = [] 320 for cm in _COMMENT_TEXT.finditer(comment_block): 321 comment_text.append(cm.group('text')) 322 comment_text = ' '.join(comment_text) 323 324 id = mm.group('id') 325 text = rc.Section.UnEscape(mm.group('text')) 326 327 self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text)) 328 329 msg_obj = self.Placeholderize(text) 330 331 # Messages that contain only placeholders do not need translation. 332 is_translateable = False 333 for item in msg_obj.GetContent(): 334 if isinstance(item, types.StringTypes): 335 if not _WHITESPACE_ONLY.match(item): 336 is_translateable = True 337 338 if self.not_localizable_re.search(comment_text): 339 is_translateable = False 340 341 message_meaning = '' 342 internal_comment = '' 343 344 # If we have a "role model" (existing GRD file) and this node exists 345 # in the role model, use the description, meaning and translateable 346 # attributes from the role model. 347 if self.role_model: 348 role_node = self.role_model.GetNodeById(id) 349 if role_node: 350 is_translateable = role_node.IsTranslateable() 351 message_meaning = role_node.attrs['meaning'] 352 comment_text = role_node.attrs['desc'] 353 internal_comment = role_node.attrs['internal_comment'] 354 355 # For nontranslateable messages, we don't want the complexity of 356 # placeholderizing everything. 357 if not is_translateable: 358 msg_obj = tclib.Message(text=text) 359 360 msg_node = message.MessageNode.Construct(node, msg_obj, id, 361 desc=comment_text, 362 translateable=is_translateable, 363 meaning=message_meaning) 364 msg_node.attrs['internal_comment'] = internal_comment 365 366 node.AddChild(msg_node) 367 self.ExtraVerboseOut('Done processing message %s\n' % id) 368 369 370 def Placeholderize(self, text): 371 '''Creates a tclib.Message object from 'text', attempting to recognize 372 a few different formats of text that can be automatically placeholderized 373 (HTML code, printf-style format strings, and FormatMessage-style format 374 strings). 375 ''' 376 377 try: 378 # First try HTML placeholderizing. 379 # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing 380 msg = tr_html.HtmlToMessage(text, True) 381 for item in msg.GetContent(): 382 if not isinstance(item, types.StringTypes): 383 return msg # Contained at least one placeholder, so we're done 384 385 # HTML placeholderization didn't do anything, so try to find printf or 386 # FormatMessage format specifiers and change them into placeholders. 387 msg = tclib.Message() 388 parts = _FORMAT_SPECIFIER.split(text) 389 todo_counter = 1 # We make placeholder IDs 'TODO_0001' etc. 390 for part in parts: 391 if _FORMAT_SPECIFIER.match(part): 392 msg.AppendPlaceholder(tclib.Placeholder( 393 'TODO_%04d' % todo_counter, part, 'TODO')) 394 todo_counter += 1 395 elif part != '': 396 msg.AppendText(part) 397 398 if self.role_model and len(parts) > 1: # there are TODO placeholders 399 role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText( 400 msg.GetRealContent(), '') 401 if role_model_msg: 402 # replace wholesale to get placeholder names and examples 403 msg = role_model_msg 404 405 return msg 406 except: 407 print 'Exception processing message with text "%s"' % text 408 raise 409 410