• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6'''The 'grit rc2grd' tool.'''
7
8
9import os.path
10import getopt
11import re
12import StringIO
13import types
14
15import grit.node.empty
16from grit.node import include
17from grit.node import structure
18from grit.node import message
19
20from grit.gather import rc
21from grit.gather import tr_html
22
23from grit.tool import interface
24from grit.tool import postprocess_interface
25from grit.tool import preprocess_interface
26
27from grit import grd_reader
28from grit import lazy_re
29from grit import tclib
30from grit import util
31
32
33# Matches files referenced from an .rc file
34_FILE_REF = lazy_re.compile('''
35  ^(?P<id>[A-Z_0-9.]+)[ \t]+
36  (?P<type>[A-Z_0-9]+)[ \t]+
37  "(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE)
38
39
40# Matches a dialog section
41_DIALOG = lazy_re.compile(
42    '^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s*$',
43    re.MULTILINE | re.DOTALL)
44
45
46# Matches a menu section
47_MENU = lazy_re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$',
48                        re.MULTILINE | re.DOTALL)
49
50
51# Matches a versioninfo section
52_VERSIONINFO = lazy_re.compile(
53    '^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s*$.+?^END\s*$',
54    re.MULTILINE | re.DOTALL)
55
56
57# Matches a stringtable
58_STRING_TABLE = lazy_re.compile(
59    ('^STRINGTABLE(\s+(PRELOAD|DISCARDABLE|CHARACTERISTICS.+|LANGUAGE.+|'
60     'VERSION.+))*\s*\nBEGIN\s*$(?P<body>.+?)^END\s*$'),
61    re.MULTILINE | re.DOTALL)
62
63
64# Matches each message inside a stringtable, breaking it up into comments,
65# the ID of the message, and the (RC-escaped) message text.
66_MESSAGE = lazy_re.compile('''
67  (?P<comment>(^\s+//.+?)*)  # 0 or more lines of comments preceding the message
68  ^\s*
69  (?P<id>[A-Za-z0-9_]+)  # id
70  \s+
71  "(?P<text>.*?([^"]|""))"([^"]|$)  # The message itself
72  ''', re.MULTILINE | re.DOTALL | re.VERBOSE)
73
74
75# Matches each line of comment text in a multi-line comment.
76_COMMENT_TEXT = lazy_re.compile('^\s*//\s*(?P<text>.+?)$', re.MULTILINE)
77
78
79# Matches a string that is empty or all whitespace
80_WHITESPACE_ONLY = lazy_re.compile('\A\s*\Z', re.MULTILINE)
81
82
83# Finds printf and FormatMessage style format specifiers
84# Uses non-capturing groups except for the outermost group, so the output of
85# re.split() should include both the normal text and what we intend to
86# replace with placeholders.
87# TODO(joi) Check documentation for printf (and Windows variants) and FormatMessage
88_FORMAT_SPECIFIER = lazy_re.compile(
89  '(%[-# +]?(?:[0-9]*|\*)(?:\.(?:[0-9]+|\*))?(?:h|l|L)?' # printf up to last char
90  '(?:d|i|o|u|x|X|e|E|f|F|g|G|c|r|s|ls|ws)'              # printf last char
91  '|\$[1-9][0-9]*)')                                     # FormatMessage
92
93
94class Rc2Grd(interface.Tool):
95  '''A tool for converting .rc files to .grd files.  This tool is only for
96converting the source (nontranslated) .rc file to a .grd file.  For importing
97existing translations, use the rc2xtb tool.
98
99Usage:  grit [global options] rc2grd [OPTIONS] RCFILE
100
101The tool takes a single argument, which is the path to the .rc file to convert.
102It outputs a .grd file with the same name in the same directory as the .rc file.
103The .grd file may have one or more TODO comments for things that have to be
104cleaned up manually.
105
106OPTIONS may be any of the following:
107
108  -e ENCODING    Specify the ENCODING of the .rc file. Default is 'cp1252'.
109
110  -h TYPE        Specify the TYPE attribute for HTML structures.
111                 Default is 'tr_html'.
112
113  -u ENCODING    Specify the ENCODING of HTML files. Default is 'utf-8'.
114
115  -n MATCH       Specify the regular expression to match in comments that will
116                 indicate that the resource the comment belongs to is not
117                 translateable. Default is 'Not locali(s|z)able'.
118
119  -r GRDFILE     Specify that GRDFILE should be used as a "role model" for
120                 any placeholders that otherwise would have had TODO names.
121                 This attempts to find an identical message in the GRDFILE
122                 and uses that instead of the automatically placeholderized
123                 message.
124
125  --pre CLASS    Specify an optional, fully qualified classname, which
126                 has to be a subclass of grit.tool.PreProcessor, to
127                 run on the text of the RC file before conversion occurs.
128                 This can be used to support constructs in the RC files
129                 that GRIT cannot handle on its own.
130
131  --post CLASS   Specify an optional, fully qualified classname, which
132                 has to be a subclass of grit.tool.PostProcessor, to
133                 run on the text of the converted RC file.
134                 This can be used to alter the content of the RC file
135                 based on the conversion that occured.
136
137For menus, dialogs and version info, the .grd file will refer to the original
138.rc file.  Once conversion is complete, you can strip the original .rc file
139of its string table and all comments as these will be available in the .grd
140file.
141
142Note that this tool WILL NOT obey C preprocessor rules, so even if something
143is #if 0-ed out it will still be included in the output of this tool
144Therefore, if your .rc file contains sections like this, you should run the
145C preprocessor on the .rc file or manually edit it before using this tool.
146'''
147
148  def ShortDescription(self):
149    return 'A tool for converting .rc source files to .grd files.'
150
151  def __init__(self):
152    self.input_encoding = 'cp1252'
153    self.html_type = 'tr_html'
154    self.html_encoding = 'utf-8'
155    self.not_localizable_re = re.compile('Not locali(s|z)able')
156    self.role_model = None
157    self.pre_process = None
158    self.post_process = None
159
160  def ParseOptions(self, args):
161    '''Given a list of arguments, set this object's options and return
162    all non-option arguments.
163    '''
164    (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post='])
165    for (key, val) in own_opts:
166      if key == '-e':
167        self.input_encoding = val
168      elif key == '-h':
169        self.html_type = val
170      elif key == '-u':
171        self.html_encoding = val
172      elif key == '-n':
173        self.not_localizable_re = re.compile(val)
174      elif key == '-r':
175        self.role_model = grd_reader.Parse(val)
176      elif key == '--pre':
177        self.pre_process = val
178      elif key == '--post':
179        self.post_process = val
180    return args
181
182  def Run(self, opts, args):
183    args = self.ParseOptions(args)
184    if len(args) != 1:
185      print ('This tool takes a single tool-specific argument, the path to the\n'
186             '.rc file to process.')
187      return 2
188    self.SetOptions(opts)
189
190    path = args[0]
191    out_path = os.path.join(util.dirname(path),
192                os.path.splitext(os.path.basename(path))[0] + '.grd')
193
194    rctext = util.ReadFile(path, self.input_encoding)
195    grd_text = unicode(self.Process(rctext, path))
196    with util.WrapOutputStream(file(out_path, 'w'), 'utf-8') as outfile:
197      outfile.write(grd_text)
198
199    print 'Wrote output file %s.\nPlease check for TODO items in the file.' % out_path
200
201
202  def Process(self, rctext, rc_path):
203    '''Processes 'rctext' and returns a resource tree corresponding to it.
204
205    Args:
206      rctext: complete text of the rc file
207      rc_path: 'resource\resource.rc'
208
209    Return:
210      grit.node.base.Node subclass
211    '''
212
213    if self.pre_process:
214      preprocess_class = util.NewClassInstance(self.pre_process,
215                                               preprocess_interface.PreProcessor)
216      if preprocess_class:
217        rctext = preprocess_class.Process(rctext, rc_path)
218      else:
219        self.Out(
220          'PreProcessing class could not be found. Skipping preprocessing.\n')
221
222    # Start with a basic skeleton for the .grd file
223    root = grd_reader.Parse(StringIO.StringIO(
224      '''<?xml version="1.0" encoding="UTF-8"?>
225      <grit base_dir="." latest_public_release="0"
226          current_release="1" source_lang_id="en">
227        <outputs />
228        <translations />
229        <release seq="1">
230          <includes />
231          <structures />
232          <messages />
233        </release>
234      </grit>'''), util.dirname(rc_path))
235    includes = root.children[2].children[0]
236    structures = root.children[2].children[1]
237    messages = root.children[2].children[2]
238    assert (isinstance(includes, grit.node.empty.IncludesNode) and
239            isinstance(structures, grit.node.empty.StructuresNode) and
240            isinstance(messages, grit.node.empty.MessagesNode))
241
242    self.AddIncludes(rctext, includes)
243    self.AddStructures(rctext, structures, os.path.basename(rc_path))
244    self.AddMessages(rctext, messages)
245
246    self.VerboseOut('Validating that all IDs are unique...\n')
247    root.ValidateUniqueIds()
248    self.ExtraVerboseOut('Done validating that all IDs are unique.\n')
249
250    if self.post_process:
251      postprocess_class = util.NewClassInstance(self.post_process,
252                                                postprocess_interface.PostProcessor)
253      if postprocess_class:
254        root = postprocess_class.Process(rctext, rc_path, root)
255      else:
256        self.Out(
257          'PostProcessing class could not be found. Skipping postprocessing.\n')
258
259    return root
260
261
262  def IsHtml(self, res_type, fname):
263    '''Check whether both the type and file extension indicate HTML'''
264    fext = fname.split('.')[-1].lower()
265    return res_type == 'HTML' and fext in ('htm', 'html')
266
267
268  def AddIncludes(self, rctext, node):
269    '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and
270    adds each included resource as an <include> child node of 'node'.'''
271    for m in _FILE_REF.finditer(rctext):
272      id = m.group('id')
273      res_type = m.group('type').upper()
274      fname = rc.Section.UnEscape(m.group('file'))
275      assert fname.find('\n') == -1
276      if not self.IsHtml(res_type, fname):
277        self.VerboseOut('Processing %s with ID %s (filename: %s)\n' %
278                        (res_type, id, fname))
279        node.AddChild(include.IncludeNode.Construct(node, id, res_type, fname))
280
281
282  def AddStructures(self, rctext, node, rc_filename):
283    '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version
284    information resources and HTML templates) and adds each as a <structure>
285    child of 'node'.'''
286    # First add HTML includes
287    for m in _FILE_REF.finditer(rctext):
288      id = m.group('id')
289      res_type = m.group('type').upper()
290      fname = rc.Section.UnEscape(m.group('file'))
291      if self.IsHtml(type, fname):
292        node.AddChild(structure.StructureNode.Construct(
293          node, id, self.html_type, fname, self.html_encoding))
294
295    # Then add all RC includes
296    def AddStructure(res_type, id):
297      self.VerboseOut('Processing %s with ID %s\n' % (res_type, id))
298      node.AddChild(structure.StructureNode.Construct(node, id, res_type,
299                                                      rc_filename,
300                                                      encoding=self.input_encoding))
301    for m in _MENU.finditer(rctext):
302      AddStructure('menu', m.group('id'))
303    for m in _DIALOG.finditer(rctext):
304      AddStructure('dialog', m.group('id'))
305    for m in _VERSIONINFO.finditer(rctext):
306      AddStructure('version', m.group('id'))
307
308
309  def AddMessages(self, rctext, node):
310    '''Scans 'rctext' for all messages in string tables, preprocesses them as
311    much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d
312    type format specifiers get those specifiers replaced with placeholders, and
313    HTML-formatted messages get run through the HTML-placeholderizer).  Adds
314    each message as a <message> node child of 'node'.'''
315    for tm in _STRING_TABLE.finditer(rctext):
316      table = tm.group('body')
317      for mm in _MESSAGE.finditer(table):
318        comment_block = mm.group('comment')
319        comment_text = []
320        for cm in _COMMENT_TEXT.finditer(comment_block):
321          comment_text.append(cm.group('text'))
322        comment_text = ' '.join(comment_text)
323
324        id = mm.group('id')
325        text = rc.Section.UnEscape(mm.group('text'))
326
327        self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text))
328
329        msg_obj = self.Placeholderize(text)
330
331        # Messages that contain only placeholders do not need translation.
332        is_translateable = False
333        for item in msg_obj.GetContent():
334          if isinstance(item, types.StringTypes):
335            if not _WHITESPACE_ONLY.match(item):
336              is_translateable = True
337
338        if self.not_localizable_re.search(comment_text):
339          is_translateable = False
340
341        message_meaning = ''
342        internal_comment = ''
343
344        # If we have a "role model" (existing GRD file) and this node exists
345        # in the role model, use the description, meaning and translateable
346        # attributes from the role model.
347        if self.role_model:
348          role_node = self.role_model.GetNodeById(id)
349          if role_node:
350            is_translateable = role_node.IsTranslateable()
351            message_meaning = role_node.attrs['meaning']
352            comment_text = role_node.attrs['desc']
353            internal_comment = role_node.attrs['internal_comment']
354
355        # For nontranslateable messages, we don't want the complexity of
356        # placeholderizing everything.
357        if not is_translateable:
358          msg_obj = tclib.Message(text=text)
359
360        msg_node = message.MessageNode.Construct(node, msg_obj, id,
361                                                 desc=comment_text,
362                                                 translateable=is_translateable,
363                                                 meaning=message_meaning)
364        msg_node.attrs['internal_comment'] = internal_comment
365
366        node.AddChild(msg_node)
367        self.ExtraVerboseOut('Done processing message %s\n' % id)
368
369
370  def Placeholderize(self, text):
371    '''Creates a tclib.Message object from 'text', attempting to recognize
372    a few different formats of text that can be automatically placeholderized
373    (HTML code, printf-style format strings, and FormatMessage-style format
374    strings).
375    '''
376
377    try:
378      # First try HTML placeholderizing.
379      # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing
380      msg = tr_html.HtmlToMessage(text, True)
381      for item in msg.GetContent():
382        if not isinstance(item, types.StringTypes):
383          return msg  # Contained at least one placeholder, so we're done
384
385      # HTML placeholderization didn't do anything, so try to find printf or
386      # FormatMessage format specifiers and change them into placeholders.
387      msg = tclib.Message()
388      parts = _FORMAT_SPECIFIER.split(text)
389      todo_counter = 1  # We make placeholder IDs 'TODO_0001' etc.
390      for part in parts:
391        if _FORMAT_SPECIFIER.match(part):
392          msg.AppendPlaceholder(tclib.Placeholder(
393            'TODO_%04d' % todo_counter, part, 'TODO'))
394          todo_counter += 1
395        elif part != '':
396          msg.AppendText(part)
397
398      if self.role_model and len(parts) > 1:  # there are TODO placeholders
399        role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText(
400          msg.GetRealContent(), '')
401        if role_model_msg:
402          # replace wholesale to get placeholder names and examples
403          msg = role_model_msg
404
405      return msg
406    except:
407      print 'Exception processing message with text "%s"' % text
408      raise
409
410