1 #include "pseudolocalize.h"
2
3 using namespace std;
4
5 // String basis to generate expansion
6 static const String16 k_expansion_string = String16("one two three "
7 "four five six seven eight nine ten eleven twelve thirteen "
8 "fourteen fiveteen sixteen seventeen nineteen twenty");
9
10 // Special unicode characters to override directionality of the words
11 static const String16 k_rlm = String16("\xe2\x80\x8f");
12 static const String16 k_rlo = String16("\xE2\x80\xae");
13 static const String16 k_pdf = String16("\xE2\x80\xac");
14
15 // Placeholder marks
16 static const String16 k_placeholder_open = String16("\xc2\xbb");
17 static const String16 k_placeholder_close = String16("\xc2\xab");
18
19 static const char*
pseudolocalize_char(const char16_t c)20 pseudolocalize_char(const char16_t c)
21 {
22 switch (c) {
23 case 'a': return "\xc3\xa5";
24 case 'b': return "\xc9\x93";
25 case 'c': return "\xc3\xa7";
26 case 'd': return "\xc3\xb0";
27 case 'e': return "\xc3\xa9";
28 case 'f': return "\xc6\x92";
29 case 'g': return "\xc4\x9d";
30 case 'h': return "\xc4\xa5";
31 case 'i': return "\xc3\xae";
32 case 'j': return "\xc4\xb5";
33 case 'k': return "\xc4\xb7";
34 case 'l': return "\xc4\xbc";
35 case 'm': return "\xe1\xb8\xbf";
36 case 'n': return "\xc3\xb1";
37 case 'o': return "\xc3\xb6";
38 case 'p': return "\xc3\xbe";
39 case 'q': return "\x51";
40 case 'r': return "\xc5\x95";
41 case 's': return "\xc5\xa1";
42 case 't': return "\xc5\xa3";
43 case 'u': return "\xc3\xbb";
44 case 'v': return "\x56";
45 case 'w': return "\xc5\xb5";
46 case 'x': return "\xd1\x85";
47 case 'y': return "\xc3\xbd";
48 case 'z': return "\xc5\xbe";
49 case 'A': return "\xc3\x85";
50 case 'B': return "\xce\xb2";
51 case 'C': return "\xc3\x87";
52 case 'D': return "\xc3\x90";
53 case 'E': return "\xc3\x89";
54 case 'G': return "\xc4\x9c";
55 case 'H': return "\xc4\xa4";
56 case 'I': return "\xc3\x8e";
57 case 'J': return "\xc4\xb4";
58 case 'K': return "\xc4\xb6";
59 case 'L': return "\xc4\xbb";
60 case 'M': return "\xe1\xb8\xbe";
61 case 'N': return "\xc3\x91";
62 case 'O': return "\xc3\x96";
63 case 'P': return "\xc3\x9e";
64 case 'Q': return "\x71";
65 case 'R': return "\xc5\x94";
66 case 'S': return "\xc5\xa0";
67 case 'T': return "\xc5\xa2";
68 case 'U': return "\xc3\x9b";
69 case 'V': return "\xce\xbd";
70 case 'W': return "\xc5\xb4";
71 case 'X': return "\xc3\x97";
72 case 'Y': return "\xc3\x9d";
73 case 'Z': return "\xc5\xbd";
74 case '!': return "\xc2\xa1";
75 case '?': return "\xc2\xbf";
76 case '$': return "\xe2\x82\xac";
77 default: return NULL;
78 }
79 }
80
81 static bool
is_possible_normal_placeholder_end(const char16_t c)82 is_possible_normal_placeholder_end(const char16_t c) {
83 switch (c) {
84 case 's': return true;
85 case 'S': return true;
86 case 'c': return true;
87 case 'C': return true;
88 case 'd': return true;
89 case 'o': return true;
90 case 'x': return true;
91 case 'X': return true;
92 case 'f': return true;
93 case 'e': return true;
94 case 'E': return true;
95 case 'g': return true;
96 case 'G': return true;
97 case 'a': return true;
98 case 'A': return true;
99 case 'b': return true;
100 case 'B': return true;
101 case 'h': return true;
102 case 'H': return true;
103 case '%': return true;
104 case 'n': return true;
105 default: return false;
106 }
107 }
108
109 String16
pseudo_generate_expansion(const unsigned int length)110 pseudo_generate_expansion(const unsigned int length) {
111 String16 result = k_expansion_string;
112 const char16_t* s = result.string();
113 if (result.size() < length) {
114 result += String16(" ");
115 result += pseudo_generate_expansion(length - result.size());
116 } else {
117 int ext = 0;
118 // Should contain only whole words, so looking for a space
119 for (unsigned int i = length + 1; i < result.size(); ++i) {
120 ++ext;
121 if (s[i] == ' ') {
122 break;
123 }
124 }
125 result.remove(length + ext, 0);
126 }
127 return result;
128 }
129
130 /**
131 * Converts characters so they look like they've been localized.
132 *
133 * Note: This leaves escape sequences untouched so they can later be
134 * processed by ResTable::collectString in the normal way.
135 */
136 String16
pseudolocalize_string(const String16 & source)137 pseudolocalize_string(const String16& source)
138 {
139 const char16_t* s = source.string();
140 String16 result;
141 const size_t I = source.size();
142 for (size_t i=0; i<I; i++) {
143 char16_t c = s[i];
144 if (c == '\\') {
145 // Escape syntax, no need to pseudolocalize
146 if (i<I-1) {
147 result += String16("\\");
148 i++;
149 c = s[i];
150 switch (c) {
151 case 'u':
152 // this one takes up 5 chars
153 result += String16(s+i, 5);
154 i += 4;
155 break;
156 case 't':
157 case 'n':
158 case '#':
159 case '@':
160 case '?':
161 case '"':
162 case '\'':
163 case '\\':
164 default:
165 result.append(&c, 1);
166 break;
167 }
168 } else {
169 result.append(&c, 1);
170 }
171 } else if (c == '%') {
172 // Placeholder syntax, no need to pseudolocalize
173 result += k_placeholder_open;
174 bool end = false;
175 result.append(&c, 1);
176 while (!end && i < I) {
177 ++i;
178 c = s[i];
179 result.append(&c, 1);
180 if (is_possible_normal_placeholder_end(c)) {
181 end = true;
182 } else if (c == 't') {
183 ++i;
184 c = s[i];
185 result.append(&c, 1);
186 end = true;
187 }
188 }
189 result += k_placeholder_close;
190 } else if (c == '<' || c == '&') {
191 // html syntax, no need to pseudolocalize
192 bool tag_closed = false;
193 while (!tag_closed && i < I) {
194 if (c == '&') {
195 String16 escape_text;
196 escape_text.append(&c, 1);
197 bool end = false;
198 size_t htmlCodePos = i;
199 while (!end && htmlCodePos < I) {
200 ++htmlCodePos;
201 c = s[htmlCodePos];
202 escape_text.append(&c, 1);
203 // Valid html code
204 if (c == ';') {
205 end = true;
206 i = htmlCodePos;
207 }
208 // Wrong html code
209 else if (!((c == '#' ||
210 (c >= 'a' && c <= 'z') ||
211 (c >= 'A' && c <= 'Z') ||
212 (c >= '0' && c <= '9')))) {
213 end = true;
214 }
215 }
216 result += escape_text;
217 if (escape_text != String16("<")) {
218 tag_closed = true;
219 }
220 continue;
221 }
222 if (c == '>') {
223 tag_closed = true;
224 result.append(&c, 1);
225 continue;
226 }
227 result.append(&c, 1);
228 i++;
229 c = s[i];
230 }
231 } else {
232 // This is a pure text that should be pseudolocalized
233 const char* p = pseudolocalize_char(c);
234 if (p != NULL) {
235 result += String16(p);
236 } else {
237 result.append(&c, 1);
238 }
239 }
240 }
241 return result;
242 }
243
244 String16
pseudobidi_string(const String16 & source)245 pseudobidi_string(const String16& source)
246 {
247 const char16_t* s = source.string();
248 String16 result;
249 result += k_rlm;
250 result += k_rlo;
251 for (size_t i=0; i<source.size(); i++) {
252 char16_t c = s[i];
253 switch(c) {
254 case ' ': result += k_pdf;
255 result += k_rlm;
256 result.append(&c, 1);
257 result += k_rlm;
258 result += k_rlo;
259 break;
260 default: result.append(&c, 1);
261 break;
262 }
263 }
264 result += k_pdf;
265 result += k_rlm;
266 return result;
267 }
268
269