1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "compile/PseudolocaleGenerator.h"
18
19 #include <algorithm>
20
21 #include "ResourceTable.h"
22 #include "ResourceValues.h"
23 #include "ValueVisitor.h"
24 #include "compile/Pseudolocalizer.h"
25 #include "util/Util.h"
26
27 using ::android::ConfigDescription;
28 using ::android::StringPiece;
29 using ::android::StringPiece16;
30
31 namespace aapt {
32
33 // The struct that represents both Span objects and UntranslatableSections.
34 struct UnifiedSpan {
35 // Only present for Span objects. If not present, this was an UntranslatableSection.
36 Maybe<std::string> tag;
37
38 // The UTF-16 index into the string where this span starts.
39 uint32_t first_char;
40
41 // The UTF-16 index into the string where this span ends, inclusive.
42 uint32_t last_char;
43 };
44
operator <(const UnifiedSpan & left,const UnifiedSpan & right)45 inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
46 if (left.first_char < right.first_char) {
47 return true;
48 } else if (left.first_char > right.first_char) {
49 return false;
50 } else if (left.last_char < right.last_char) {
51 return true;
52 }
53 return false;
54 }
55
SpanToUnifiedSpan(const StringPool::Span & span)56 inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) {
57 return UnifiedSpan{*span.name, span.first_char, span.last_char};
58 }
59
UntranslatableSectionToUnifiedSpan(const UntranslatableSection & section)60 inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
61 return UnifiedSpan{
62 {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
63 }
64
65 // Merges the Span and UntranslatableSections of this StyledString into a single vector of
66 // UnifiedSpans. This will first check that the Spans are sorted in ascending order.
MergeSpans(const StyledString & string)67 static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
68 // Ensure the Spans are sorted and converted.
69 std::vector<UnifiedSpan> sorted_spans;
70 sorted_spans.reserve(string.value->spans.size());
71 std::transform(string.value->spans.begin(), string.value->spans.end(),
72 std::back_inserter(sorted_spans), SpanToUnifiedSpan);
73
74 // Stable sort to ensure tag sequences like "<b><i>" are preserved.
75 std::stable_sort(sorted_spans.begin(), sorted_spans.end());
76
77 // Ensure the UntranslatableSections are sorted and converted.
78 std::vector<UnifiedSpan> sorted_untranslatable_sections;
79 sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
80 std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
81 std::back_inserter(sorted_untranslatable_sections),
82 UntranslatableSectionToUnifiedSpan);
83 std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
84
85 std::vector<UnifiedSpan> merged_spans;
86 merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
87 auto span_iter = sorted_spans.begin();
88 auto untranslatable_iter = sorted_untranslatable_sections.begin();
89 while (span_iter != sorted_spans.end() &&
90 untranslatable_iter != sorted_untranslatable_sections.end()) {
91 if (*span_iter < *untranslatable_iter) {
92 merged_spans.push_back(std::move(*span_iter));
93 ++span_iter;
94 } else {
95 merged_spans.push_back(std::move(*untranslatable_iter));
96 ++untranslatable_iter;
97 }
98 }
99
100 while (span_iter != sorted_spans.end()) {
101 merged_spans.push_back(std::move(*span_iter));
102 ++span_iter;
103 }
104
105 while (untranslatable_iter != sorted_untranslatable_sections.end()) {
106 merged_spans.push_back(std::move(*untranslatable_iter));
107 ++untranslatable_iter;
108 }
109 return merged_spans;
110 }
111
PseudolocalizeStyledString(StyledString * string,Pseudolocalizer::Method method,StringPool * pool)112 std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
113 Pseudolocalizer::Method method,
114 StringPool* pool) {
115 Pseudolocalizer localizer(method);
116
117 // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
118 // This will effectively subdivide the string into multiple sections that can be individually
119 // pseudolocalized, while keeping the span indices synchronized.
120 std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
121
122 // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
123 // runtime. So we will do all our processing in UTF-16, then convert back.
124 const std::u16string text16 = util::Utf8ToUtf16(string->value->value);
125
126 // Convenient wrapper around the text that allows us to work with StringPieces.
127 const StringPiece16 text(text16);
128
129 // The new string.
130 std::string new_string = localizer.Start();
131
132 // The stack that keeps track of what nested Span we're in.
133 std::vector<size_t> span_stack;
134
135 // The current position in the original text.
136 uint32_t cursor = 0u;
137
138 // The current position in the new text.
139 uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
140 new_string.size(), false);
141
142 // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
143 bool translatable = true;
144 size_t span_idx = 0u;
145 while (span_idx < merged_spans.size() || !span_stack.empty()) {
146 UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
147 UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
148
149 if (span != nullptr) {
150 if (parent_span == nullptr || parent_span->last_char > span->first_char) {
151 // There is no parent, or this span is the child of the parent.
152 // Pseudolocalize all the text until this span.
153 const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
154 cursor += substr.size();
155
156 // Pseudolocalize the substring.
157 std::string new_substr = util::Utf16ToUtf8(substr);
158 if (translatable) {
159 new_substr = localizer.Text(new_substr);
160 }
161 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
162 new_substr.size(), false);
163 new_string += new_substr;
164
165 // Rewrite the first_char.
166 span->first_char = new_cursor;
167 if (!span->tag) {
168 // An untranslatable section has begun!
169 translatable = false;
170 }
171 span_stack.push_back(span_idx);
172 ++span_idx;
173 continue;
174 }
175 }
176
177 if (parent_span != nullptr) {
178 // There is a parent, and either this span is not a child of it, or there are no more spans.
179 // Pop this off the stack.
180 const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
181 cursor += substr.size();
182
183 // Pseudolocalize the substring.
184 std::string new_substr = util::Utf16ToUtf8(substr);
185 if (translatable) {
186 new_substr = localizer.Text(new_substr);
187 }
188 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
189 new_substr.size(), false);
190 new_string += new_substr;
191
192 parent_span->last_char = new_cursor - 1;
193 if (parent_span->tag) {
194 // An end to an untranslatable section.
195 translatable = true;
196 }
197 span_stack.pop_back();
198 }
199 }
200
201 // Finish the pseudolocalization at the end of the string.
202 new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
203 new_string += localizer.End();
204
205 StyleString localized;
206 localized.str = std::move(new_string);
207
208 // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
209 for (UnifiedSpan& span : merged_spans) {
210 if (span.tag) {
211 localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char});
212 }
213 }
214 return util::make_unique<StyledString>(pool->MakeRef(localized));
215 }
216
217 namespace {
218
219 class Visitor : public ValueVisitor {
220 public:
221 // Either value or item will be populated upon visiting the value.
222 std::unique_ptr<Value> value;
223 std::unique_ptr<Item> item;
224
Visitor(StringPool * pool,Pseudolocalizer::Method method)225 Visitor(StringPool* pool, Pseudolocalizer::Method method)
226 : pool_(pool), method_(method), localizer_(method) {}
227
Visit(Plural * plural)228 void Visit(Plural* plural) override {
229 CloningValueTransformer cloner(pool_);
230 std::unique_ptr<Plural> localized = util::make_unique<Plural>();
231 for (size_t i = 0; i < plural->values.size(); i++) {
232 Visitor sub_visitor(pool_, method_);
233 if (plural->values[i]) {
234 plural->values[i]->Accept(&sub_visitor);
235 if (sub_visitor.item) {
236 localized->values[i] = std::move(sub_visitor.item);
237 } else {
238 localized->values[i] = plural->values[i]->Transform(cloner);
239 }
240 }
241 }
242 localized->SetSource(plural->GetSource());
243 localized->SetWeak(true);
244 value = std::move(localized);
245 }
246
Visit(String * string)247 void Visit(String* string) override {
248 const StringPiece original_string = *string->value;
249 std::string result = localizer_.Start();
250
251 // Pseudolocalize only the translatable sections.
252 size_t start = 0u;
253 for (const UntranslatableSection& section : string->untranslatable_sections) {
254 // Pseudolocalize the content before the untranslatable section.
255 const size_t len = section.start - start;
256 if (len > 0u) {
257 result += localizer_.Text(original_string.substr(start, len));
258 }
259
260 // Copy the untranslatable content.
261 result += original_string.substr(section.start, section.end - section.start);
262 start = section.end;
263 }
264
265 // Pseudolocalize the content after the last untranslatable section.
266 if (start != original_string.size()) {
267 const size_t len = original_string.size() - start;
268 result += localizer_.Text(original_string.substr(start, len));
269 }
270 result += localizer_.End();
271
272 std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
273 localized->SetSource(string->GetSource());
274 localized->SetWeak(true);
275 item = std::move(localized);
276 }
277
Visit(StyledString * string)278 void Visit(StyledString* string) override {
279 item = PseudolocalizeStyledString(string, method_, pool_);
280 item->SetSource(string->GetSource());
281 item->SetWeak(true);
282 }
283
284 private:
285 DISALLOW_COPY_AND_ASSIGN(Visitor);
286
287 StringPool* pool_;
288 Pseudolocalizer::Method method_;
289 Pseudolocalizer localizer_;
290 };
291
ModifyConfigForPseudoLocale(const ConfigDescription & base,Pseudolocalizer::Method m)292 ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
293 Pseudolocalizer::Method m) {
294 ConfigDescription modified = base;
295 switch (m) {
296 case Pseudolocalizer::Method::kAccent:
297 modified.language[0] = 'e';
298 modified.language[1] = 'n';
299 modified.country[0] = 'X';
300 modified.country[1] = 'A';
301 break;
302
303 case Pseudolocalizer::Method::kBidi:
304 modified.language[0] = 'a';
305 modified.language[1] = 'r';
306 modified.country[0] = 'X';
307 modified.country[1] = 'B';
308 break;
309 default:
310 break;
311 }
312 return modified;
313 }
314
PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,ResourceConfigValue * original_value,StringPool * pool,ResourceEntry * entry)315 void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
316 ResourceConfigValue* original_value,
317 StringPool* pool, ResourceEntry* entry) {
318 Visitor visitor(pool, method);
319 original_value->value->Accept(&visitor);
320
321 std::unique_ptr<Value> localized_value;
322 if (visitor.value) {
323 localized_value = std::move(visitor.value);
324 } else if (visitor.item) {
325 localized_value = std::move(visitor.item);
326 }
327
328 if (!localized_value) {
329 return;
330 }
331
332 ConfigDescription config_with_accent =
333 ModifyConfigForPseudoLocale(original_value->config, method);
334
335 ResourceConfigValue* new_config_value =
336 entry->FindOrCreateValue(config_with_accent, original_value->product);
337 if (!new_config_value->value) {
338 // Only use auto-generated pseudo-localization if none is defined.
339 new_config_value->value = std::move(localized_value);
340 }
341 }
342
343 // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
344 // translatable.
IsPseudolocalizable(ResourceConfigValue * config_value)345 static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
346 const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
347 if (diff & ConfigDescription::CONFIG_LOCALE) {
348 return false;
349 }
350 return config_value->value->IsTranslatable();
351 }
352
353 } // namespace
354
Consume(IAaptContext * context,ResourceTable * table)355 bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
356 for (auto& package : table->packages) {
357 for (auto& type : package->types) {
358 for (auto& entry : type->entries) {
359 std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
360 for (ResourceConfigValue* value : values) {
361 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
362 entry.get());
363 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
364 entry.get());
365 }
366 }
367 }
368 }
369 return true;
370 }
371
372 } // namespace aapt
373