1#!/usr/bin/env python 2# Copyright (c) 2012 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6'''Pseudotranslation support. Our pseudotranslations are based on the 7P-language, which is a simple vowel-extending language. Examples of P: 8 - "hello" becomes "hepellopo" 9 - "howdie" becomes "hopowdiepie" 10 - "because" becomes "bepecaupause" (but in our implementation we don't 11 handle the silent e at the end so it actually would return "bepecaupausepe" 12 13The P-language has the excellent quality of increasing the length of text 14by around 30-50% which is great for pseudotranslations, to stress test any 15GUI layouts etc. 16 17To make the pseudotranslations more obviously "not a translation" and to make 18them exercise any code that deals with encodings, we also transform all English 19vowels into equivalent vowels with diacriticals on them (rings, acutes, 20diaresis, and circumflex), and we write the "p" in the P-language as a Hebrew 21character Qof. It looks sort of like a latin character "p" but it is outside 22the latin-1 character set which will stress character encoding bugs. 23''' 24 25from grit import lazy_re 26from grit import tclib 27 28 29# An RFC language code for the P pseudolanguage. 30PSEUDO_LANG = 'x-P-pseudo' 31 32# Hebrew character Qof. It looks kind of like a 'p' but is outside 33# the latin-1 character set which is good for our purposes. 34# TODO(joi) For now using P instead of Qof, because of some bugs it used. Find 35# a better solution, i.e. one that introduces a non-latin1 character into the 36# pseudotranslation. 37#_QOF = u'\u05e7' 38_QOF = u'P' 39 40# How we map each vowel. 41_VOWELS = { 42 u'a' : u'\u00e5', # a with ring 43 u'e' : u'\u00e9', # e acute 44 u'i' : u'\u00ef', # i diaresis 45 u'o' : u'\u00f4', # o circumflex 46 u'u' : u'\u00fc', # u diaresis 47 u'y' : u'\u00fd', # y acute 48 u'A' : u'\u00c5', # A with ring 49 u'E' : u'\u00c9', # E acute 50 u'I' : u'\u00cf', # I diaresis 51 u'O' : u'\u00d4', # O circumflex 52 u'U' : u'\u00dc', # U diaresis 53 u'Y' : u'\u00dd', # Y acute 54} 55 56# Matches vowels and P 57_PSUB_RE = lazy_re.compile("(%s)" % '|'.join(_VOWELS.keys() + ['P'])) 58 59 60# Pseudotranslations previously created. This is important for performance 61# reasons, especially since we routinely pseudotranslate the whole project 62# several or many different times for each build. 63_existing_translations = {} 64 65 66def MapVowels(str, also_p = False): 67 '''Returns a copy of 'str' where characters that exist as keys in _VOWELS 68 have been replaced with the corresponding value. If also_p is true, this 69 function will also change capital P characters into a Hebrew character Qof. 70 ''' 71 def Repl(match): 72 if match.group() == 'p': 73 if also_p: 74 return _QOF 75 else: 76 return 'p' 77 else: 78 return _VOWELS[match.group()] 79 return _PSUB_RE.sub(Repl, str) 80 81 82def PseudoString(str): 83 '''Returns a pseudotranslation of the provided string, in our enhanced 84 P-language.''' 85 if str in _existing_translations: 86 return _existing_translations[str] 87 88 outstr = u'' 89 ix = 0 90 while ix < len(str): 91 if str[ix] not in _VOWELS.keys(): 92 outstr += str[ix] 93 ix += 1 94 else: 95 # We want to treat consecutive vowels as one composite vowel. This is not 96 # always accurate e.g. in composite words but good enough. 97 consecutive_vowels = u'' 98 while ix < len(str) and str[ix] in _VOWELS.keys(): 99 consecutive_vowels += str[ix] 100 ix += 1 101 changed_vowels = MapVowels(consecutive_vowels) 102 outstr += changed_vowels 103 outstr += _QOF 104 outstr += changed_vowels 105 106 _existing_translations[str] = outstr 107 return outstr 108 109 110def PseudoMessage(message): 111 '''Returns a pseudotranslation of the provided message. 112 113 Args: 114 message: tclib.Message() 115 116 Return: 117 tclib.Translation() 118 ''' 119 transl = tclib.Translation() 120 121 for part in message.GetContent(): 122 if isinstance(part, tclib.Placeholder): 123 transl.AppendPlaceholder(part) 124 else: 125 transl.AppendText(PseudoString(part)) 126 127 return transl 128 129