1 /*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "compile/PseudolocaleGenerator.h"
18
19 #include <algorithm>
20
21 #include "ResourceTable.h"
22 #include "ResourceValues.h"
23 #include "ValueVisitor.h"
24 #include "compile/Pseudolocalizer.h"
25 #include "util/Util.h"
26
27 using android::StringPiece;
28 using android::StringPiece16;
29
30 namespace aapt {
31
32 // The struct that represents both Span objects and UntranslatableSections.
33 struct UnifiedSpan {
34 // Only present for Span objects. If not present, this was an UntranslatableSection.
35 Maybe<std::string> tag;
36
37 // The UTF-16 index into the string where this span starts.
38 uint32_t first_char;
39
40 // The UTF-16 index into the string where this span ends, inclusive.
41 uint32_t last_char;
42 };
43
operator <(const UnifiedSpan & left,const UnifiedSpan & right)44 inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
45 if (left.first_char < right.first_char) {
46 return true;
47 } else if (left.first_char > right.first_char) {
48 return false;
49 } else if (left.last_char < right.last_char) {
50 return true;
51 }
52 return false;
53 }
54
SpanToUnifiedSpan(const StringPool::Span & span)55 inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) {
56 return UnifiedSpan{*span.name, span.first_char, span.last_char};
57 }
58
UntranslatableSectionToUnifiedSpan(const UntranslatableSection & section)59 inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
60 return UnifiedSpan{
61 {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
62 }
63
64 // Merges the Span and UntranslatableSections of this StyledString into a single vector of
65 // UnifiedSpans. This will first check that the Spans are sorted in ascending order.
MergeSpans(const StyledString & string)66 static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
67 // Ensure the Spans are sorted and converted.
68 std::vector<UnifiedSpan> sorted_spans;
69 sorted_spans.reserve(string.value->spans.size());
70 std::transform(string.value->spans.begin(), string.value->spans.end(),
71 std::back_inserter(sorted_spans), SpanToUnifiedSpan);
72
73 // Stable sort to ensure tag sequences like "<b><i>" are preserved.
74 std::stable_sort(sorted_spans.begin(), sorted_spans.end());
75
76 // Ensure the UntranslatableSections are sorted and converted.
77 std::vector<UnifiedSpan> sorted_untranslatable_sections;
78 sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
79 std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
80 std::back_inserter(sorted_untranslatable_sections),
81 UntranslatableSectionToUnifiedSpan);
82 std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
83
84 std::vector<UnifiedSpan> merged_spans;
85 merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
86 auto span_iter = sorted_spans.begin();
87 auto untranslatable_iter = sorted_untranslatable_sections.begin();
88 while (span_iter != sorted_spans.end() &&
89 untranslatable_iter != sorted_untranslatable_sections.end()) {
90 if (*span_iter < *untranslatable_iter) {
91 merged_spans.push_back(std::move(*span_iter));
92 ++span_iter;
93 } else {
94 merged_spans.push_back(std::move(*untranslatable_iter));
95 ++untranslatable_iter;
96 }
97 }
98
99 while (span_iter != sorted_spans.end()) {
100 merged_spans.push_back(std::move(*span_iter));
101 ++span_iter;
102 }
103
104 while (untranslatable_iter != sorted_untranslatable_sections.end()) {
105 merged_spans.push_back(std::move(*untranslatable_iter));
106 ++untranslatable_iter;
107 }
108 return merged_spans;
109 }
110
PseudolocalizeStyledString(StyledString * string,Pseudolocalizer::Method method,StringPool * pool)111 std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
112 Pseudolocalizer::Method method,
113 StringPool* pool) {
114 Pseudolocalizer localizer(method);
115
116 // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
117 // This will effectively subdivide the string into multiple sections that can be individually
118 // pseudolocalized, while keeping the span indices synchronized.
119 std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
120
121 // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
122 // runtime. So we will do all our processing in UTF-16, then convert back.
123 const std::u16string text16 = util::Utf8ToUtf16(*string->value->str);
124
125 // Convenient wrapper around the text that allows us to work with StringPieces.
126 const StringPiece16 text(text16);
127
128 // The new string.
129 std::string new_string = localizer.Start();
130
131 // The stack that keeps track of what nested Span we're in.
132 std::vector<size_t> span_stack;
133
134 // The current position in the original text.
135 uint32_t cursor = 0u;
136
137 // The current position in the new text.
138 uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
139 new_string.size(), false);
140
141 // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
142 bool translatable = true;
143 size_t span_idx = 0u;
144 while (span_idx < merged_spans.size() || !span_stack.empty()) {
145 UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
146 UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
147
148 if (span != nullptr) {
149 if (parent_span == nullptr || parent_span->last_char > span->first_char) {
150 // There is no parent, or this span is the child of the parent.
151 // Pseudolocalize all the text until this span.
152 const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
153 cursor += substr.size();
154
155 // Pseudolocalize the substring.
156 std::string new_substr = util::Utf16ToUtf8(substr);
157 if (translatable) {
158 new_substr = localizer.Text(new_substr);
159 }
160 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
161 new_substr.size(), false);
162 new_string += new_substr;
163
164 // Rewrite the first_char.
165 span->first_char = new_cursor;
166 if (!span->tag) {
167 // An untranslatable section has begun!
168 translatable = false;
169 }
170 span_stack.push_back(span_idx);
171 ++span_idx;
172 continue;
173 }
174 }
175
176 if (parent_span != nullptr) {
177 // There is a parent, and either this span is not a child of it, or there are no more spans.
178 // Pop this off the stack.
179 const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
180 cursor += substr.size();
181
182 // Pseudolocalize the substring.
183 std::string new_substr = util::Utf16ToUtf8(substr);
184 if (translatable) {
185 new_substr = localizer.Text(new_substr);
186 }
187 new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
188 new_substr.size(), false);
189 new_string += new_substr;
190
191 parent_span->last_char = new_cursor - 1;
192 if (parent_span->tag) {
193 // An end to an untranslatable section.
194 translatable = true;
195 }
196 span_stack.pop_back();
197 }
198 }
199
200 // Finish the pseudolocalization at the end of the string.
201 new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
202 new_string += localizer.End();
203
204 StyleString localized;
205 localized.str = std::move(new_string);
206
207 // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
208 for (UnifiedSpan& span : merged_spans) {
209 if (span.tag) {
210 localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char});
211 }
212 }
213 return util::make_unique<StyledString>(pool->MakeRef(localized));
214 }
215
216 namespace {
217
218 class Visitor : public RawValueVisitor {
219 public:
220 // Either value or item will be populated upon visiting the value.
221 std::unique_ptr<Value> value;
222 std::unique_ptr<Item> item;
223
Visitor(StringPool * pool,Pseudolocalizer::Method method)224 Visitor(StringPool* pool, Pseudolocalizer::Method method)
225 : pool_(pool), method_(method), localizer_(method) {}
226
Visit(Plural * plural)227 void Visit(Plural* plural) override {
228 std::unique_ptr<Plural> localized = util::make_unique<Plural>();
229 for (size_t i = 0; i < plural->values.size(); i++) {
230 Visitor sub_visitor(pool_, method_);
231 if (plural->values[i]) {
232 plural->values[i]->Accept(&sub_visitor);
233 if (sub_visitor.value) {
234 localized->values[i] = std::move(sub_visitor.item);
235 } else {
236 localized->values[i] = std::unique_ptr<Item>(plural->values[i]->Clone(pool_));
237 }
238 }
239 }
240 localized->SetSource(plural->GetSource());
241 localized->SetWeak(true);
242 value = std::move(localized);
243 }
244
Visit(String * string)245 void Visit(String* string) override {
246 const StringPiece original_string = *string->value;
247 std::string result = localizer_.Start();
248
249 // Pseudolocalize only the translatable sections.
250 size_t start = 0u;
251 for (const UntranslatableSection& section : string->untranslatable_sections) {
252 // Pseudolocalize the content before the untranslatable section.
253 const size_t len = section.start - start;
254 if (len > 0u) {
255 result += localizer_.Text(original_string.substr(start, len));
256 }
257
258 // Copy the untranslatable content.
259 result += original_string.substr(section.start, section.end - section.start);
260 start = section.end;
261 }
262
263 // Pseudolocalize the content after the last untranslatable section.
264 if (start != original_string.size()) {
265 const size_t len = original_string.size() - start;
266 result += localizer_.Text(original_string.substr(start, len));
267 }
268 result += localizer_.End();
269
270 std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
271 localized->SetSource(string->GetSource());
272 localized->SetWeak(true);
273 item = std::move(localized);
274 }
275
Visit(StyledString * string)276 void Visit(StyledString* string) override {
277 item = PseudolocalizeStyledString(string, method_, pool_);
278 item->SetSource(string->GetSource());
279 item->SetWeak(true);
280 }
281
282 private:
283 DISALLOW_COPY_AND_ASSIGN(Visitor);
284
285 StringPool* pool_;
286 Pseudolocalizer::Method method_;
287 Pseudolocalizer localizer_;
288 };
289
ModifyConfigForPseudoLocale(const ConfigDescription & base,Pseudolocalizer::Method m)290 ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
291 Pseudolocalizer::Method m) {
292 ConfigDescription modified = base;
293 switch (m) {
294 case Pseudolocalizer::Method::kAccent:
295 modified.language[0] = 'e';
296 modified.language[1] = 'n';
297 modified.country[0] = 'X';
298 modified.country[1] = 'A';
299 break;
300
301 case Pseudolocalizer::Method::kBidi:
302 modified.language[0] = 'a';
303 modified.language[1] = 'r';
304 modified.country[0] = 'X';
305 modified.country[1] = 'B';
306 break;
307 default:
308 break;
309 }
310 return modified;
311 }
312
PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,ResourceConfigValue * original_value,StringPool * pool,ResourceEntry * entry)313 void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
314 ResourceConfigValue* original_value,
315 StringPool* pool, ResourceEntry* entry) {
316 Visitor visitor(pool, method);
317 original_value->value->Accept(&visitor);
318
319 std::unique_ptr<Value> localized_value;
320 if (visitor.value) {
321 localized_value = std::move(visitor.value);
322 } else if (visitor.item) {
323 localized_value = std::move(visitor.item);
324 }
325
326 if (!localized_value) {
327 return;
328 }
329
330 ConfigDescription config_with_accent =
331 ModifyConfigForPseudoLocale(original_value->config, method);
332
333 ResourceConfigValue* new_config_value =
334 entry->FindOrCreateValue(config_with_accent, original_value->product);
335 if (!new_config_value->value) {
336 // Only use auto-generated pseudo-localization if none is defined.
337 new_config_value->value = std::move(localized_value);
338 }
339 }
340
341 // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
342 // translatable.
IsPseudolocalizable(ResourceConfigValue * config_value)343 static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
344 const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
345 if (diff & ConfigDescription::CONFIG_LOCALE) {
346 return false;
347 }
348 return config_value->value->IsTranslatable();
349 }
350
351 } // namespace
352
Consume(IAaptContext * context,ResourceTable * table)353 bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
354 for (auto& package : table->packages) {
355 for (auto& type : package->types) {
356 for (auto& entry : type->entries) {
357 std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
358 for (ResourceConfigValue* value : values) {
359 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
360 entry.get());
361 PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
362 entry.get());
363 }
364 }
365 }
366 }
367 return true;
368 }
369
370 } // namespace aapt
371