/* * Copyright (C) 2015 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "compile/Pseudolocalizer.h" #include "util/Util.h" using android::StringPiece; namespace aapt { // String basis to generate expansion static const std::string kExpansionString = "one two three " "four five six seven eight nine ten eleven twelve thirteen " "fourteen fiveteen sixteen seventeen nineteen twenty"; // Special unicode characters to override directionality of the words static const std::string kRlm = "\u200f"; static const std::string kRlo = "\u202e"; static const std::string kPdf = "\u202c"; // Placeholder marks static const std::string kPlaceholderOpen = "\u00bb"; static const std::string kPlaceholderClose = "\u00ab"; static const char kArgStart = '{'; static const char kArgEnd = '}'; class PseudoMethodNone : public PseudoMethodImpl { public: std::string Text(const StringPiece& text) override { return text.to_string(); } std::string Placeholder(const StringPiece& text) override { return text.to_string(); } }; class PseudoMethodBidi : public PseudoMethodImpl { public: std::string Text(const StringPiece& text) override; std::string Placeholder(const StringPiece& text) override; }; class PseudoMethodAccent : public PseudoMethodImpl { public: PseudoMethodAccent() : depth_(0), word_count_(0), length_(0) {} std::string Start() override; std::string End() override; std::string Text(const StringPiece& text) override; std::string Placeholder(const StringPiece& text) override; private: size_t depth_; size_t word_count_; size_t length_; }; Pseudolocalizer::Pseudolocalizer(Method method) : last_depth_(0) { SetMethod(method); } void Pseudolocalizer::SetMethod(Method method) { switch (method) { case Method::kNone: impl_ = util::make_unique(); break; case Method::kAccent: impl_ = util::make_unique(); break; case Method::kBidi: impl_ = util::make_unique(); break; } } std::string Pseudolocalizer::Text(const StringPiece& text) { std::string out; size_t depth = last_depth_; size_t lastpos, pos; const size_t length = text.size(); const char* str = text.data(); bool escaped = false; for (lastpos = pos = 0; pos < length; pos++) { char16_t c = str[pos]; if (escaped) { escaped = false; continue; } if (c == '\'') { escaped = true; continue; } if (c == kArgStart) { depth++; } else if (c == kArgEnd && depth) { depth--; } if (last_depth_ != depth || pos == length - 1) { bool pseudo = ((last_depth_ % 2) == 0); size_t nextpos = pos; if (!pseudo || depth == last_depth_) { nextpos++; } size_t size = nextpos - lastpos; if (size) { std::string chunk = text.substr(lastpos, size).to_string(); if (pseudo) { chunk = impl_->Text(chunk); } else if (str[lastpos] == kArgStart && str[nextpos - 1] == kArgEnd) { chunk = impl_->Placeholder(chunk); } out.append(chunk); } if (pseudo && depth < last_depth_) { // End of message out.append(impl_->End()); } else if (!pseudo && depth > last_depth_) { // Start of message out.append(impl_->Start()); } lastpos = nextpos; last_depth_ = depth; } } return out; } static const char* PseudolocalizeChar(const char c) { switch (c) { case 'a': return "\u00e5"; case 'b': return "\u0253"; case 'c': return "\u00e7"; case 'd': return "\u00f0"; case 'e': return "\u00e9"; case 'f': return "\u0192"; case 'g': return "\u011d"; case 'h': return "\u0125"; case 'i': return "\u00ee"; case 'j': return "\u0135"; case 'k': return "\u0137"; case 'l': return "\u013c"; case 'm': return "\u1e3f"; case 'n': return "\u00f1"; case 'o': return "\u00f6"; case 'p': return "\u00fe"; case 'q': return "\u0051"; case 'r': return "\u0155"; case 's': return "\u0161"; case 't': return "\u0163"; case 'u': return "\u00fb"; case 'v': return "\u0056"; case 'w': return "\u0175"; case 'x': return "\u0445"; case 'y': return "\u00fd"; case 'z': return "\u017e"; case 'A': return "\u00c5"; case 'B': return "\u03b2"; case 'C': return "\u00c7"; case 'D': return "\u00d0"; case 'E': return "\u00c9"; case 'G': return "\u011c"; case 'H': return "\u0124"; case 'I': return "\u00ce"; case 'J': return "\u0134"; case 'K': return "\u0136"; case 'L': return "\u013b"; case 'M': return "\u1e3e"; case 'N': return "\u00d1"; case 'O': return "\u00d6"; case 'P': return "\u00de"; case 'Q': return "\u0071"; case 'R': return "\u0154"; case 'S': return "\u0160"; case 'T': return "\u0162"; case 'U': return "\u00db"; case 'V': return "\u03bd"; case 'W': return "\u0174"; case 'X': return "\u00d7"; case 'Y': return "\u00dd"; case 'Z': return "\u017d"; case '!': return "\u00a1"; case '?': return "\u00bf"; case '$': return "\u20ac"; default: return nullptr; } } static bool IsPossibleNormalPlaceholderEnd(const char c) { switch (c) { case 's': return true; case 'S': return true; case 'c': return true; case 'C': return true; case 'd': return true; case 'o': return true; case 'x': return true; case 'X': return true; case 'f': return true; case 'e': return true; case 'E': return true; case 'g': return true; case 'G': return true; case 'a': return true; case 'A': return true; case 'b': return true; case 'B': return true; case 'h': return true; case 'H': return true; case '%': return true; case 'n': return true; default: return false; } } static std::string PseudoGenerateExpansion(const unsigned int length) { std::string result = kExpansionString; const char* s = result.data(); if (result.size() < length) { result += " "; result += PseudoGenerateExpansion(length - result.size()); } else { int ext = 0; // Should contain only whole words, so looking for a space for (unsigned int i = length + 1; i < result.size(); ++i) { ++ext; if (s[i] == ' ') { break; } } result = result.substr(0, length + ext); } return result; } std::string PseudoMethodAccent::Start() { std::string result; if (depth_ == 0) { result = "["; } word_count_ = length_ = 0; depth_++; return result; } std::string PseudoMethodAccent::End() { std::string result; if (length_) { result += " "; result += PseudoGenerateExpansion(word_count_ > 3 ? length_ : length_ / 2); } word_count_ = length_ = 0; depth_--; if (depth_ == 0) { result += "]"; } return result; } /** * Converts characters so they look like they've been localized. * * Note: This leaves placeholder syntax untouched. */ std::string PseudoMethodAccent::Text(const StringPiece& source) { const char* s = source.data(); std::string result; const size_t I = source.size(); bool lastspace = true; for (size_t i = 0; i < I; i++) { char c = s[i]; if (c == '%') { // Placeholder syntax, no need to pseudolocalize std::string chunk; bool end = false; chunk.append(&c, 1); while (!end && i + 1 < I) { ++i; c = s[i]; chunk.append(&c, 1); if (IsPossibleNormalPlaceholderEnd(c)) { end = true; } else if (i + 1 < I && c == 't') { ++i; c = s[i]; chunk.append(&c, 1); end = true; } } // Treat chunk as a placeholder unless it ends with %. result += ((c == '%') ? chunk : Placeholder(chunk)); } else if (c == '<' || c == '&') { // html syntax, no need to pseudolocalize bool tag_closed = false; while (!tag_closed && i < I) { if (c == '&') { std::string escape_text; escape_text.append(&c, 1); bool end = false; size_t html_code_pos = i; while (!end && html_code_pos < I) { ++html_code_pos; c = s[html_code_pos]; escape_text.append(&c, 1); // Valid html code if (c == ';') { end = true; i = html_code_pos; } // Wrong html code else if (!((c == '#' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')))) { end = true; } } result += escape_text; if (escape_text != "<") { tag_closed = true; } continue; } if (c == '>') { tag_closed = true; result.append(&c, 1); continue; } result.append(&c, 1); i++; c = s[i]; } } else { // This is a pure text that should be pseudolocalized const char* p = PseudolocalizeChar(c); if (p != nullptr) { result += p; } else { bool space = isspace(c); if (lastspace && !space) { word_count_++; } lastspace = space; result.append(&c, 1); } // Count only pseudolocalizable chars and delimiters length_++; } } return result; } std::string PseudoMethodAccent::Placeholder(const StringPiece& source) { // Surround a placeholder with brackets return kPlaceholderOpen + source.to_string() + kPlaceholderClose; } std::string PseudoMethodBidi::Text(const StringPiece& source) { const char* s = source.data(); std::string result; bool lastspace = true; bool space = true; bool escape = false; const char ESCAPE_CHAR = '\\'; for (size_t i = 0; i < source.size(); i++) { char c = s[i]; if (!escape && c == ESCAPE_CHAR) { escape = true; continue; } space = (!escape && isspace(c)) || (escape && (c == 'n' || c == 't')); if (lastspace && !space) { // Word start result += kRlm + kRlo; } else if (!lastspace && space) { // Word end result += kPdf + kRlm; } lastspace = space; if (escape) { result.append(&ESCAPE_CHAR, 1); escape=false; } result.append(&c, 1); } if (!lastspace) { // End of last word result += kPdf + kRlm; } return result; } std::string PseudoMethodBidi::Placeholder(const StringPiece& source) { // Surround a placeholder with directionality change sequence return kRlm + kRlo + source.to_string() + kPdf + kRlm; } } // namespace aapt