• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "compile/PseudolocaleGenerator.h"
18 
19 #include <algorithm>
20 
21 #include "ResourceTable.h"
22 #include "ResourceValues.h"
23 #include "ValueVisitor.h"
24 #include "compile/Pseudolocalizer.h"
25 #include "util/Util.h"
26 
27 using ::android::ConfigDescription;
28 using ::android::StringPiece;
29 using ::android::StringPiece16;
30 
31 namespace aapt {
32 
33 // The struct that represents both Span objects and UntranslatableSections.
34 struct UnifiedSpan {
35   // Only present for Span objects. If not present, this was an UntranslatableSection.
36   Maybe<std::string> tag;
37 
38   // The UTF-16 index into the string where this span starts.
39   uint32_t first_char;
40 
41   // The UTF-16 index into the string where this span ends, inclusive.
42   uint32_t last_char;
43 };
44 
operator <(const UnifiedSpan & left,const UnifiedSpan & right)45 inline static bool operator<(const UnifiedSpan& left, const UnifiedSpan& right) {
46   if (left.first_char < right.first_char) {
47     return true;
48   } else if (left.first_char > right.first_char) {
49     return false;
50   } else if (left.last_char < right.last_char) {
51     return true;
52   }
53   return false;
54 }
55 
SpanToUnifiedSpan(const StringPool::Span & span)56 inline static UnifiedSpan SpanToUnifiedSpan(const StringPool::Span& span) {
57   return UnifiedSpan{*span.name, span.first_char, span.last_char};
58 }
59 
UntranslatableSectionToUnifiedSpan(const UntranslatableSection & section)60 inline static UnifiedSpan UntranslatableSectionToUnifiedSpan(const UntranslatableSection& section) {
61   return UnifiedSpan{
62       {}, static_cast<uint32_t>(section.start), static_cast<uint32_t>(section.end) - 1};
63 }
64 
65 // Merges the Span and UntranslatableSections of this StyledString into a single vector of
66 // UnifiedSpans. This will first check that the Spans are sorted in ascending order.
MergeSpans(const StyledString & string)67 static std::vector<UnifiedSpan> MergeSpans(const StyledString& string) {
68   // Ensure the Spans are sorted and converted.
69   std::vector<UnifiedSpan> sorted_spans;
70   sorted_spans.reserve(string.value->spans.size());
71   std::transform(string.value->spans.begin(), string.value->spans.end(),
72                  std::back_inserter(sorted_spans), SpanToUnifiedSpan);
73 
74   // Stable sort to ensure tag sequences like "<b><i>" are preserved.
75   std::stable_sort(sorted_spans.begin(), sorted_spans.end());
76 
77   // Ensure the UntranslatableSections are sorted and converted.
78   std::vector<UnifiedSpan> sorted_untranslatable_sections;
79   sorted_untranslatable_sections.reserve(string.untranslatable_sections.size());
80   std::transform(string.untranslatable_sections.begin(), string.untranslatable_sections.end(),
81                  std::back_inserter(sorted_untranslatable_sections),
82                  UntranslatableSectionToUnifiedSpan);
83   std::sort(sorted_untranslatable_sections.begin(), sorted_untranslatable_sections.end());
84 
85   std::vector<UnifiedSpan> merged_spans;
86   merged_spans.reserve(sorted_spans.size() + sorted_untranslatable_sections.size());
87   auto span_iter = sorted_spans.begin();
88   auto untranslatable_iter = sorted_untranslatable_sections.begin();
89   while (span_iter != sorted_spans.end() &&
90          untranslatable_iter != sorted_untranslatable_sections.end()) {
91     if (*span_iter < *untranslatable_iter) {
92       merged_spans.push_back(std::move(*span_iter));
93       ++span_iter;
94     } else {
95       merged_spans.push_back(std::move(*untranslatable_iter));
96       ++untranslatable_iter;
97     }
98   }
99 
100   while (span_iter != sorted_spans.end()) {
101     merged_spans.push_back(std::move(*span_iter));
102     ++span_iter;
103   }
104 
105   while (untranslatable_iter != sorted_untranslatable_sections.end()) {
106     merged_spans.push_back(std::move(*untranslatable_iter));
107     ++untranslatable_iter;
108   }
109   return merged_spans;
110 }
111 
PseudolocalizeStyledString(StyledString * string,Pseudolocalizer::Method method,StringPool * pool)112 std::unique_ptr<StyledString> PseudolocalizeStyledString(StyledString* string,
113                                                          Pseudolocalizer::Method method,
114                                                          StringPool* pool) {
115   Pseudolocalizer localizer(method);
116 
117   // Collect the spans and untranslatable sections into one set of spans, sorted by first_char.
118   // This will effectively subdivide the string into multiple sections that can be individually
119   // pseudolocalized, while keeping the span indices synchronized.
120   std::vector<UnifiedSpan> merged_spans = MergeSpans(*string);
121 
122   // All Span indices are UTF-16 based, according to the resources.arsc format expected by the
123   // runtime. So we will do all our processing in UTF-16, then convert back.
124   const std::u16string text16 = util::Utf8ToUtf16(string->value->value);
125 
126   // Convenient wrapper around the text that allows us to work with StringPieces.
127   const StringPiece16 text(text16);
128 
129   // The new string.
130   std::string new_string = localizer.Start();
131 
132   // The stack that keeps track of what nested Span we're in.
133   std::vector<size_t> span_stack;
134 
135   // The current position in the original text.
136   uint32_t cursor = 0u;
137 
138   // The current position in the new text.
139   uint32_t new_cursor = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_string.data()),
140                                              new_string.size(), false);
141 
142   // We assume no nesting of untranslatable sections, since XLIFF doesn't allow it.
143   bool translatable = true;
144   size_t span_idx = 0u;
145   while (span_idx < merged_spans.size() || !span_stack.empty()) {
146     UnifiedSpan* span = span_idx >= merged_spans.size() ? nullptr : &merged_spans[span_idx];
147     UnifiedSpan* parent_span = span_stack.empty() ? nullptr : &merged_spans[span_stack.back()];
148 
149     if (span != nullptr) {
150       if (parent_span == nullptr || parent_span->last_char > span->first_char) {
151         // There is no parent, or this span is the child of the parent.
152         // Pseudolocalize all the text until this span.
153         const StringPiece16 substr = text.substr(cursor, span->first_char - cursor);
154         cursor += substr.size();
155 
156         // Pseudolocalize the substring.
157         std::string new_substr = util::Utf16ToUtf8(substr);
158         if (translatable) {
159           new_substr = localizer.Text(new_substr);
160         }
161         new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
162                                            new_substr.size(), false);
163         new_string += new_substr;
164 
165         // Rewrite the first_char.
166         span->first_char = new_cursor;
167         if (!span->tag) {
168           // An untranslatable section has begun!
169           translatable = false;
170         }
171         span_stack.push_back(span_idx);
172         ++span_idx;
173         continue;
174       }
175     }
176 
177     if (parent_span != nullptr) {
178       // There is a parent, and either this span is not a child of it, or there are no more spans.
179       // Pop this off the stack.
180       const StringPiece16 substr = text.substr(cursor, parent_span->last_char - cursor + 1);
181       cursor += substr.size();
182 
183       // Pseudolocalize the substring.
184       std::string new_substr = util::Utf16ToUtf8(substr);
185       if (translatable) {
186         new_substr = localizer.Text(new_substr);
187       }
188       new_cursor += utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(new_substr.data()),
189                                          new_substr.size(), false);
190       new_string += new_substr;
191 
192       parent_span->last_char = new_cursor - 1;
193       if (parent_span->tag) {
194         // An end to an untranslatable section.
195         translatable = true;
196       }
197       span_stack.pop_back();
198     }
199   }
200 
201   // Finish the pseudolocalization at the end of the string.
202   new_string += localizer.Text(util::Utf16ToUtf8(text.substr(cursor, text.size() - cursor)));
203   new_string += localizer.End();
204 
205   StyleString localized;
206   localized.str = std::move(new_string);
207 
208   // Convert the UnifiedSpans into regular Spans, skipping the UntranslatableSections.
209   for (UnifiedSpan& span : merged_spans) {
210     if (span.tag) {
211       localized.spans.push_back(Span{std::move(span.tag.value()), span.first_char, span.last_char});
212     }
213   }
214   return util::make_unique<StyledString>(pool->MakeRef(localized));
215 }
216 
217 namespace {
218 
219 class Visitor : public ValueVisitor {
220  public:
221   // Either value or item will be populated upon visiting the value.
222   std::unique_ptr<Value> value;
223   std::unique_ptr<Item> item;
224 
Visitor(StringPool * pool,Pseudolocalizer::Method method)225   Visitor(StringPool* pool, Pseudolocalizer::Method method)
226       : pool_(pool), method_(method), localizer_(method) {}
227 
Visit(Plural * plural)228   void Visit(Plural* plural) override {
229     CloningValueTransformer cloner(pool_);
230     std::unique_ptr<Plural> localized = util::make_unique<Plural>();
231     for (size_t i = 0; i < plural->values.size(); i++) {
232       Visitor sub_visitor(pool_, method_);
233       if (plural->values[i]) {
234         plural->values[i]->Accept(&sub_visitor);
235         if (sub_visitor.item) {
236           localized->values[i] = std::move(sub_visitor.item);
237         } else {
238           localized->values[i] = plural->values[i]->Transform(cloner);
239         }
240       }
241     }
242     localized->SetSource(plural->GetSource());
243     localized->SetWeak(true);
244     value = std::move(localized);
245   }
246 
Visit(String * string)247   void Visit(String* string) override {
248     const StringPiece original_string = *string->value;
249     std::string result = localizer_.Start();
250 
251     // Pseudolocalize only the translatable sections.
252     size_t start = 0u;
253     for (const UntranslatableSection& section : string->untranslatable_sections) {
254       // Pseudolocalize the content before the untranslatable section.
255       const size_t len = section.start - start;
256       if (len > 0u) {
257         result += localizer_.Text(original_string.substr(start, len));
258       }
259 
260       // Copy the untranslatable content.
261       result += original_string.substr(section.start, section.end - section.start);
262       start = section.end;
263     }
264 
265     // Pseudolocalize the content after the last untranslatable section.
266     if (start != original_string.size()) {
267       const size_t len = original_string.size() - start;
268       result += localizer_.Text(original_string.substr(start, len));
269     }
270     result += localizer_.End();
271 
272     std::unique_ptr<String> localized = util::make_unique<String>(pool_->MakeRef(result));
273     localized->SetSource(string->GetSource());
274     localized->SetWeak(true);
275     item = std::move(localized);
276   }
277 
Visit(StyledString * string)278   void Visit(StyledString* string) override {
279     item = PseudolocalizeStyledString(string, method_, pool_);
280     item->SetSource(string->GetSource());
281     item->SetWeak(true);
282   }
283 
284  private:
285   DISALLOW_COPY_AND_ASSIGN(Visitor);
286 
287   StringPool* pool_;
288   Pseudolocalizer::Method method_;
289   Pseudolocalizer localizer_;
290 };
291 
ModifyConfigForPseudoLocale(const ConfigDescription & base,Pseudolocalizer::Method m)292 ConfigDescription ModifyConfigForPseudoLocale(const ConfigDescription& base,
293                                               Pseudolocalizer::Method m) {
294   ConfigDescription modified = base;
295   switch (m) {
296     case Pseudolocalizer::Method::kAccent:
297       modified.language[0] = 'e';
298       modified.language[1] = 'n';
299       modified.country[0] = 'X';
300       modified.country[1] = 'A';
301       break;
302 
303     case Pseudolocalizer::Method::kBidi:
304       modified.language[0] = 'a';
305       modified.language[1] = 'r';
306       modified.country[0] = 'X';
307       modified.country[1] = 'B';
308       break;
309     default:
310       break;
311   }
312   return modified;
313 }
314 
PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,ResourceConfigValue * original_value,StringPool * pool,ResourceEntry * entry)315 void PseudolocalizeIfNeeded(const Pseudolocalizer::Method method,
316                             ResourceConfigValue* original_value,
317                             StringPool* pool, ResourceEntry* entry) {
318   Visitor visitor(pool, method);
319   original_value->value->Accept(&visitor);
320 
321   std::unique_ptr<Value> localized_value;
322   if (visitor.value) {
323     localized_value = std::move(visitor.value);
324   } else if (visitor.item) {
325     localized_value = std::move(visitor.item);
326   }
327 
328   if (!localized_value) {
329     return;
330   }
331 
332   ConfigDescription config_with_accent =
333       ModifyConfigForPseudoLocale(original_value->config, method);
334 
335   ResourceConfigValue* new_config_value =
336       entry->FindOrCreateValue(config_with_accent, original_value->product);
337   if (!new_config_value->value) {
338     // Only use auto-generated pseudo-localization if none is defined.
339     new_config_value->value = std::move(localized_value);
340   }
341 }
342 
343 // A value is pseudolocalizable if it does not define a locale (or is the default locale) and is
344 // translatable.
IsPseudolocalizable(ResourceConfigValue * config_value)345 static bool IsPseudolocalizable(ResourceConfigValue* config_value) {
346   const int diff = config_value->config.diff(ConfigDescription::DefaultConfig());
347   if (diff & ConfigDescription::CONFIG_LOCALE) {
348     return false;
349   }
350   return config_value->value->IsTranslatable();
351 }
352 
353 }  // namespace
354 
Consume(IAaptContext * context,ResourceTable * table)355 bool PseudolocaleGenerator::Consume(IAaptContext* context, ResourceTable* table) {
356   for (auto& package : table->packages) {
357     for (auto& type : package->types) {
358       for (auto& entry : type->entries) {
359         std::vector<ResourceConfigValue*> values = entry->FindValuesIf(IsPseudolocalizable);
360         for (ResourceConfigValue* value : values) {
361           PseudolocalizeIfNeeded(Pseudolocalizer::Method::kAccent, value, &table->string_pool,
362                                  entry.get());
363           PseudolocalizeIfNeeded(Pseudolocalizer::Method::kBidi, value, &table->string_pool,
364                                  entry.get());
365         }
366       }
367     }
368   }
369   return true;
370 }
371 
372 }  // namespace aapt
373