1#!/usr/bin/env python 2# Copyright (c) 2012 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6'''Collections of messages and their translations, called cliques. Also 7collections of cliques (uber-cliques). 8''' 9 10import re 11import types 12 13from grit import constants 14from grit import exception 15from grit import lazy_re 16from grit import pseudo 17from grit import pseudo_rtl 18from grit import tclib 19 20 21class UberClique(object): 22 '''A factory (NOT a singleton factory) for making cliques. It has several 23 methods for working with the cliques created using the factory. 24 ''' 25 26 def __init__(self): 27 # A map from message ID to list of cliques whose source messages have 28 # that ID. This will contain all cliques created using this factory. 29 # Different messages can have the same ID because they have the 30 # same translateable portion and placeholder names, but occur in different 31 # places in the resource tree. 32 # 33 # Each list of cliques is kept sorted by description, to achieve 34 # stable results from the BestClique method, see below. 35 self.cliques_ = {} 36 37 # A map of clique IDs to list of languages to indicate translations where we 38 # fell back to English. 39 self.fallback_translations_ = {} 40 41 # A map of clique IDs to list of languages to indicate missing translations. 42 self.missing_translations_ = {} 43 44 def _AddMissingTranslation(self, lang, clique, is_error): 45 tl = self.fallback_translations_ 46 if is_error: 47 tl = self.missing_translations_ 48 id = clique.GetId() 49 if id not in tl: 50 tl[id] = {} 51 if lang not in tl[id]: 52 tl[id][lang] = 1 53 54 def HasMissingTranslations(self): 55 return len(self.missing_translations_) > 0 56 57 def MissingTranslationsReport(self): 58 '''Returns a string suitable for printing to report missing 59 and fallback translations to the user. 60 ''' 61 def ReportTranslation(clique, langs): 62 text = clique.GetMessage().GetPresentableContent() 63 # The text 'error' (usually 'Error:' but we are conservative) 64 # can trigger some build environments (Visual Studio, we're 65 # looking at you) to consider invocation of grit to have failed, 66 # so we make sure never to output that word. 67 extract = re.sub('(?i)error', 'REDACTED', text[0:40])[0:40] 68 ellipsis = '' 69 if len(text) > 40: 70 ellipsis = '...' 71 langs_extract = langs[0:6] 72 describe_langs = ','.join(langs_extract) 73 if len(langs) > 6: 74 describe_langs += " and %d more" % (len(langs) - 6) 75 return " %s \"%s%s\" %s" % (clique.GetId(), extract, ellipsis, 76 describe_langs) 77 lines = [] 78 if len(self.fallback_translations_): 79 lines.append( 80 "WARNING: Fell back to English for the following translations:") 81 for (id, langs) in self.fallback_translations_.items(): 82 lines.append(ReportTranslation(self.cliques_[id][0], langs.keys())) 83 if len(self.missing_translations_): 84 lines.append("ERROR: The following translations are MISSING:") 85 for (id, langs) in self.missing_translations_.items(): 86 lines.append(ReportTranslation(self.cliques_[id][0], langs.keys())) 87 return '\n'.join(lines) 88 89 def MakeClique(self, message, translateable=True): 90 '''Create a new clique initialized with a message. 91 92 Args: 93 message: tclib.Message() 94 translateable: True | False 95 ''' 96 clique = MessageClique(self, message, translateable) 97 98 # Enable others to find this clique by its message ID 99 if message.GetId() in self.cliques_: 100 presentable_text = clique.GetMessage().GetPresentableContent() 101 if not message.HasAssignedId(): 102 for c in self.cliques_[message.GetId()]: 103 assert c.GetMessage().GetPresentableContent() == presentable_text 104 self.cliques_[message.GetId()].append(clique) 105 # We need to keep each list of cliques sorted by description, to 106 # achieve stable results from the BestClique method, see below. 107 self.cliques_[message.GetId()].sort( 108 key=lambda c:c.GetMessage().GetDescription()) 109 else: 110 self.cliques_[message.GetId()] = [clique] 111 112 return clique 113 114 def FindCliqueAndAddTranslation(self, translation, language): 115 '''Adds the specified translation to the clique with the source message 116 it is a translation of. 117 118 Args: 119 translation: tclib.Translation() 120 language: 'en' | 'fr' ... 121 122 Return: 123 True if the source message was found, otherwise false. 124 ''' 125 if translation.GetId() in self.cliques_: 126 for clique in self.cliques_[translation.GetId()]: 127 clique.AddTranslation(translation, language) 128 return True 129 else: 130 return False 131 132 def BestClique(self, id): 133 '''Returns the "best" clique from a list of cliques. All the cliques 134 must have the same ID. The "best" clique is chosen in the following 135 order of preference: 136 - The first clique that has a non-ID-based description. 137 - If no such clique found, the first clique with an ID-based description. 138 - Otherwise the first clique. 139 140 This method is stable in terms of always returning a clique with 141 an identical description (on different runs of GRIT on the same 142 data) because self.cliques_ is sorted by description. 143 ''' 144 clique_list = self.cliques_[id] 145 clique_with_id = None 146 clique_default = None 147 for clique in clique_list: 148 if not clique_default: 149 clique_default = clique 150 151 description = clique.GetMessage().GetDescription() 152 if description and len(description) > 0: 153 if not description.startswith('ID:'): 154 # this is the preferred case so we exit right away 155 return clique 156 elif not clique_with_id: 157 clique_with_id = clique 158 if clique_with_id: 159 return clique_with_id 160 else: 161 return clique_default 162 163 def BestCliquePerId(self): 164 '''Iterates over the list of all cliques and returns the best clique for 165 each ID. This will be the first clique with a source message that has a 166 non-empty description, or an arbitrary clique if none of them has a 167 description. 168 ''' 169 for id in self.cliques_: 170 yield self.BestClique(id) 171 172 def BestCliqueByOriginalText(self, text, meaning): 173 '''Finds the "best" (as in BestClique()) clique that has original text 174 'text' and meaning 'meaning'. Returns None if there is no such clique. 175 ''' 176 # If needed, this can be optimized by maintaining a map of 177 # fingerprints of original text+meaning to cliques. 178 for c in self.BestCliquePerId(): 179 msg = c.GetMessage() 180 if msg.GetRealContent() == text and msg.GetMeaning() == meaning: 181 return msg 182 return None 183 184 def AllMessageIds(self): 185 '''Returns a list of all defined message IDs. 186 ''' 187 return self.cliques_.keys() 188 189 def AllCliques(self): 190 '''Iterates over all cliques. Note that this can return multiple cliques 191 with the same ID. 192 ''' 193 for cliques in self.cliques_.values(): 194 for c in cliques: 195 yield c 196 197 def GenerateXtbParserCallback(self, lang, debug=False): 198 '''Creates a callback function as required by grit.xtb_reader.Parse(). 199 This callback will create Translation objects for each message from 200 the XTB that exists in this uberclique, and add them as translations for 201 the relevant cliques. The callback will add translations to the language 202 specified by 'lang' 203 204 Args: 205 lang: 'fr' 206 debug: True | False 207 ''' 208 def Callback(id, structure): 209 if id not in self.cliques_: 210 if debug: print "Ignoring translation #%s" % id 211 return 212 213 if debug: print "Adding translation #%s" % id 214 215 # We fetch placeholder information from the original message (the XTB file 216 # only contains placeholder names). 217 original_msg = self.BestClique(id).GetMessage() 218 219 translation = tclib.Translation(id=id) 220 for is_ph,text in structure: 221 if not is_ph: 222 translation.AppendText(text) 223 else: 224 found_placeholder = False 225 for ph in original_msg.GetPlaceholders(): 226 if ph.GetPresentation() == text: 227 translation.AppendPlaceholder(tclib.Placeholder( 228 ph.GetPresentation(), ph.GetOriginal(), ph.GetExample())) 229 found_placeholder = True 230 break 231 if not found_placeholder: 232 raise exception.MismatchingPlaceholders( 233 'Translation for message ID %s had <ph name="%s"/>, no match\n' 234 'in original message' % (id, text)) 235 self.FindCliqueAndAddTranslation(translation, lang) 236 return Callback 237 238 239class CustomType(object): 240 '''A base class you should implement if you wish to specify a custom type 241 for a message clique (i.e. custom validation and optional modification of 242 translations).''' 243 244 def Validate(self, message): 245 '''Returns true if the message (a tclib.Message object) is valid, 246 otherwise false. 247 ''' 248 raise NotImplementedError() 249 250 def ValidateAndModify(self, lang, translation): 251 '''Returns true if the translation (a tclib.Translation object) is valid, 252 otherwise false. The language is also passed in. This method may modify 253 the translation that is passed in, if it so wishes. 254 ''' 255 raise NotImplementedError() 256 257 def ModifyTextPart(self, lang, text): 258 '''If you call ModifyEachTextPart, it will turn around and call this method 259 for each text part of the translation. You should return the modified 260 version of the text, or just the original text to not change anything. 261 ''' 262 raise NotImplementedError() 263 264 def ModifyEachTextPart(self, lang, translation): 265 '''Call this to easily modify one or more of the textual parts of a 266 translation. It will call ModifyTextPart for each part of the 267 translation. 268 ''' 269 contents = translation.GetContent() 270 for ix in range(len(contents)): 271 if (isinstance(contents[ix], types.StringTypes)): 272 contents[ix] = self.ModifyTextPart(lang, contents[ix]) 273 274 275class OneOffCustomType(CustomType): 276 '''A very simple custom type that performs the validation expressed by 277 the input expression on all languages including the source language. 278 The expression can access the variables 'lang', 'msg' and 'text()' where 'lang' 279 is the language of 'msg', 'msg' is the message or translation being 280 validated and 'text()' returns the real contents of 'msg' (for shorthand). 281 ''' 282 def __init__(self, expression): 283 self.expr = expression 284 def Validate(self, message): 285 return self.ValidateAndModify(MessageClique.source_language, message) 286 def ValidateAndModify(self, lang, msg): 287 def text(): 288 return msg.GetRealContent() 289 return eval(self.expr, {}, 290 {'lang' : lang, 291 'text' : text, 292 'msg' : msg, 293 }) 294 295 296class MessageClique(object): 297 '''A message along with all of its translations. Also code to bring 298 translations together with their original message.''' 299 300 # change this to the language code of Messages you add to cliques_. 301 # TODO(joi) Actually change this based on the <grit> node's source language 302 source_language = 'en' 303 304 # A constant translation we use when asked for a translation into the 305 # special language constants.CONSTANT_LANGUAGE. 306 CONSTANT_TRANSLATION = tclib.Translation(text='TTTTTT') 307 308 # A pattern to match messages that are empty or whitespace only. 309 WHITESPACE_MESSAGE = lazy_re.compile(u'^\s*$') 310 311 def __init__(self, uber_clique, message, translateable=True, custom_type=None): 312 '''Create a new clique initialized with just a message. 313 314 Note that messages with a body comprised only of whitespace will implicitly 315 be marked non-translatable. 316 317 Args: 318 uber_clique: Our uber-clique (collection of cliques) 319 message: tclib.Message() 320 translateable: True | False 321 custom_type: instance of clique.CustomType interface 322 ''' 323 # Our parent 324 self.uber_clique = uber_clique 325 # If not translateable, we only store the original message. 326 self.translateable = translateable 327 328 # We implicitly mark messages that have a whitespace-only body as 329 # non-translateable. 330 if MessageClique.WHITESPACE_MESSAGE.match(message.GetRealContent()): 331 self.translateable = False 332 333 # A mapping of language identifiers to tclib.BaseMessage and its 334 # subclasses (i.e. tclib.Message and tclib.Translation). 335 self.clique = { MessageClique.source_language : message } 336 # A list of the "shortcut groups" this clique is 337 # part of. Within any given shortcut group, no shortcut key (e.g. &J) 338 # must appear more than once in each language for all cliques that 339 # belong to the group. 340 self.shortcut_groups = [] 341 # An instance of the CustomType interface, or None. If this is set, it will 342 # be used to validate the original message and translations thereof, and 343 # will also get a chance to modify translations of the message. 344 self.SetCustomType(custom_type) 345 346 def GetMessage(self): 347 '''Retrieves the tclib.Message that is the source for this clique.''' 348 return self.clique[MessageClique.source_language] 349 350 def GetId(self): 351 '''Retrieves the message ID of the messages in this clique.''' 352 return self.GetMessage().GetId() 353 354 def IsTranslateable(self): 355 return self.translateable 356 357 def AddToShortcutGroup(self, group): 358 self.shortcut_groups.append(group) 359 360 def SetCustomType(self, custom_type): 361 '''Makes this clique use custom_type for validating messages and 362 translations, and optionally modifying translations. 363 ''' 364 self.custom_type = custom_type 365 if custom_type and not custom_type.Validate(self.GetMessage()): 366 raise exception.InvalidMessage(self.GetMessage().GetRealContent()) 367 368 def MessageForLanguage(self, lang, pseudo_if_no_match=True, fallback_to_english=False): 369 '''Returns the message/translation for the specified language, providing 370 a pseudotranslation if there is no available translation and a pseudo- 371 translation is requested. 372 373 The translation of any message whatsoever in the special language 374 'x_constant' is the message "TTTTTT". 375 376 Args: 377 lang: 'en' 378 pseudo_if_no_match: True 379 fallback_to_english: False 380 381 Return: 382 tclib.BaseMessage 383 ''' 384 if not self.translateable: 385 return self.GetMessage() 386 387 if lang == constants.CONSTANT_LANGUAGE: 388 return self.CONSTANT_TRANSLATION 389 390 for msglang in self.clique.keys(): 391 if lang == msglang: 392 return self.clique[msglang] 393 394 if lang == constants.FAKE_BIDI: 395 return pseudo_rtl.PseudoRTLMessage(self.GetMessage()) 396 397 if fallback_to_english: 398 self.uber_clique._AddMissingTranslation(lang, self, is_error=False) 399 return self.GetMessage() 400 401 # If we're not supposed to generate pseudotranslations, we add an error 402 # report to a list of errors, then fail at a higher level, so that we 403 # get a list of all messages that are missing translations. 404 if not pseudo_if_no_match: 405 self.uber_clique._AddMissingTranslation(lang, self, is_error=True) 406 407 return pseudo.PseudoMessage(self.GetMessage()) 408 409 def AllMessagesThatMatch(self, lang_re, include_pseudo = True): 410 '''Returns a map of all messages that match 'lang', including the pseudo 411 translation if requested. 412 413 Args: 414 lang_re: re.compile('fr|en') 415 include_pseudo: True 416 417 Return: 418 { 'en' : tclib.Message, 419 'fr' : tclib.Translation, 420 pseudo.PSEUDO_LANG : tclib.Translation } 421 ''' 422 if not self.translateable: 423 return [self.GetMessage()] 424 425 matches = {} 426 for msglang in self.clique: 427 if lang_re.match(msglang): 428 matches[msglang] = self.clique[msglang] 429 430 if include_pseudo: 431 matches[pseudo.PSEUDO_LANG] = pseudo.PseudoMessage(self.GetMessage()) 432 433 return matches 434 435 def AddTranslation(self, translation, language): 436 '''Add a translation to this clique. The translation must have the same 437 ID as the message that is the source for this clique. 438 439 If this clique is not translateable, the function just returns. 440 441 Args: 442 translation: tclib.Translation() 443 language: 'en' 444 445 Throws: 446 grit.exception.InvalidTranslation if the translation you're trying to add 447 doesn't have the same message ID as the source message of this clique. 448 ''' 449 if not self.translateable: 450 return 451 if translation.GetId() != self.GetId(): 452 raise exception.InvalidTranslation( 453 'Msg ID %s, transl ID %s' % (self.GetId(), translation.GetId())) 454 455 assert not language in self.clique 456 457 # Because two messages can differ in the original content of their 458 # placeholders yet share the same ID (because they are otherwise the 459 # same), the translation we are getting may have different original 460 # content for placeholders than our message, yet it is still the right 461 # translation for our message (because it is for the same ID). We must 462 # therefore fetch the original content of placeholders from our original 463 # English message. 464 # 465 # See grit.clique_unittest.MessageCliqueUnittest.testSemiIdenticalCliques 466 # for a concrete explanation of why this is necessary. 467 468 original = self.MessageForLanguage(self.source_language, False) 469 if len(original.GetPlaceholders()) != len(translation.GetPlaceholders()): 470 print ("ERROR: '%s' translation of message id %s does not match" % 471 (language, translation.GetId())) 472 assert False 473 474 transl_msg = tclib.Translation(id=self.GetId(), 475 text=translation.GetPresentableContent(), 476 placeholders=original.GetPlaceholders()) 477 478 if self.custom_type and not self.custom_type.ValidateAndModify(language, transl_msg): 479 print "WARNING: %s translation failed validation: %s" % ( 480 language, transl_msg.GetId()) 481 482 self.clique[language] = transl_msg 483 484