• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "util/Util.h"
18 
19 #include <algorithm>
20 #include <ostream>
21 #include <string>
22 #include <vector>
23 
24 #include "android-base/stringprintf.h"
25 #include "androidfw/StringPiece.h"
26 #include "build/version.h"
27 
28 #include "text/Unicode.h"
29 #include "text/Utf8Iterator.h"
30 #include "util/BigBuffer.h"
31 #include "util/Maybe.h"
32 #include "utils/Unicode.h"
33 
34 using ::aapt::text::Utf8Iterator;
35 using ::android::StringPiece;
36 using ::android::StringPiece16;
37 
38 namespace aapt {
39 namespace util {
40 
41 // Package name and shared user id would be used as a part of the file name.
42 // Limits size to 223 and reserves 32 for the OS.
43 // See frameworks/base/core/java/android/content/pm/parsing/ParsingPackageUtils.java
44 constexpr static const size_t kMaxPackageNameSize = 223;
45 
SplitAndTransform(const StringPiece & str,char sep,const std::function<char (char)> & f)46 static std::vector<std::string> SplitAndTransform(
47     const StringPiece& str, char sep, const std::function<char(char)>& f) {
48   std::vector<std::string> parts;
49   const StringPiece::const_iterator end = std::end(str);
50   StringPiece::const_iterator start = std::begin(str);
51   StringPiece::const_iterator current;
52   do {
53     current = std::find(start, end, sep);
54     parts.emplace_back(str.substr(start, current).to_string());
55     if (f) {
56       std::string& part = parts.back();
57       std::transform(part.begin(), part.end(), part.begin(), f);
58     }
59     start = current + 1;
60   } while (current != end);
61   return parts;
62 }
63 
Split(const StringPiece & str,char sep)64 std::vector<std::string> Split(const StringPiece& str, char sep) {
65   return SplitAndTransform(str, sep, nullptr);
66 }
67 
SplitAndLowercase(const StringPiece & str,char sep)68 std::vector<std::string> SplitAndLowercase(const StringPiece& str, char sep) {
69   return SplitAndTransform(str, sep, ::tolower);
70 }
71 
StartsWith(const StringPiece & str,const StringPiece & prefix)72 bool StartsWith(const StringPiece& str, const StringPiece& prefix) {
73   if (str.size() < prefix.size()) {
74     return false;
75   }
76   return str.substr(0, prefix.size()) == prefix;
77 }
78 
EndsWith(const StringPiece & str,const StringPiece & suffix)79 bool EndsWith(const StringPiece& str, const StringPiece& suffix) {
80   if (str.size() < suffix.size()) {
81     return false;
82   }
83   return str.substr(str.size() - suffix.size(), suffix.size()) == suffix;
84 }
85 
TrimLeadingWhitespace(const StringPiece & str)86 StringPiece TrimLeadingWhitespace(const StringPiece& str) {
87   if (str.size() == 0 || str.data() == nullptr) {
88     return str;
89   }
90 
91   const char* start = str.data();
92   const char* end = start + str.length();
93 
94   while (start != end && isspace(*start)) {
95     start++;
96   }
97   return StringPiece(start, end - start);
98 }
99 
TrimTrailingWhitespace(const StringPiece & str)100 StringPiece TrimTrailingWhitespace(const StringPiece& str) {
101   if (str.size() == 0 || str.data() == nullptr) {
102     return str;
103   }
104 
105   const char* start = str.data();
106   const char* end = start + str.length();
107 
108   while (end != start && isspace(*(end - 1))) {
109     end--;
110   }
111   return StringPiece(start, end - start);
112 }
113 
TrimWhitespace(const StringPiece & str)114 StringPiece TrimWhitespace(const StringPiece& str) {
115   if (str.size() == 0 || str.data() == nullptr) {
116     return str;
117   }
118 
119   const char* start = str.data();
120   const char* end = str.data() + str.length();
121 
122   while (start != end && isspace(*start)) {
123     start++;
124   }
125 
126   while (end != start && isspace(*(end - 1))) {
127     end--;
128   }
129 
130   return StringPiece(start, end - start);
131 }
132 
IsJavaNameImpl(const StringPiece & str)133 static int IsJavaNameImpl(const StringPiece& str) {
134   int pieces = 0;
135   for (const StringPiece& piece : Tokenize(str, '.')) {
136     pieces++;
137     if (!text::IsJavaIdentifier(piece)) {
138       return -1;
139     }
140   }
141   return pieces;
142 }
143 
IsJavaClassName(const StringPiece & str)144 bool IsJavaClassName(const StringPiece& str) {
145   return IsJavaNameImpl(str) >= 2;
146 }
147 
IsJavaPackageName(const StringPiece & str)148 bool IsJavaPackageName(const StringPiece& str) {
149   return IsJavaNameImpl(str) >= 1;
150 }
151 
IsAndroidNameImpl(const StringPiece & str)152 static int IsAndroidNameImpl(const StringPiece& str) {
153   int pieces = 0;
154   for (const StringPiece& piece : Tokenize(str, '.')) {
155     if (piece.empty()) {
156       return -1;
157     }
158 
159     const char first_character = piece.data()[0];
160     if (!::isalpha(first_character)) {
161       return -1;
162     }
163 
164     bool valid = std::all_of(piece.begin() + 1, piece.end(), [](const char c) -> bool {
165       return ::isalnum(c) || c == '_';
166     });
167 
168     if (!valid) {
169       return -1;
170     }
171     pieces++;
172   }
173   return pieces;
174 }
175 
IsAndroidPackageName(const StringPiece & str)176 bool IsAndroidPackageName(const StringPiece& str) {
177   if (str.size() > kMaxPackageNameSize) {
178     return false;
179   }
180   return IsAndroidNameImpl(str) > 1 || str == "android";
181 }
182 
IsAndroidSharedUserId(const android::StringPiece & package_name,const android::StringPiece & shared_user_id)183 bool IsAndroidSharedUserId(const android::StringPiece& package_name,
184                            const android::StringPiece& shared_user_id) {
185   if (shared_user_id.size() > kMaxPackageNameSize) {
186     return false;
187   }
188   return shared_user_id.empty() || IsAndroidNameImpl(shared_user_id) > 1 ||
189          package_name == "android";
190 }
191 
IsAndroidSplitName(const StringPiece & str)192 bool IsAndroidSplitName(const StringPiece& str) {
193   return IsAndroidNameImpl(str) > 0;
194 }
195 
GetFullyQualifiedClassName(const StringPiece & package,const StringPiece & classname)196 Maybe<std::string> GetFullyQualifiedClassName(const StringPiece& package,
197                                               const StringPiece& classname) {
198   if (classname.empty()) {
199     return {};
200   }
201 
202   if (util::IsJavaClassName(classname)) {
203     return classname.to_string();
204   }
205 
206   if (package.empty()) {
207     return {};
208   }
209 
210   std::string result = package.to_string();
211   if (classname.data()[0] != '.') {
212     result += '.';
213   }
214 
215   result.append(classname.data(), classname.size());
216   if (!IsJavaClassName(result)) {
217     return {};
218   }
219   return result;
220 }
221 
GetToolName()222 const char* GetToolName() {
223   static const char* const sToolName = "Android Asset Packaging Tool (aapt)";
224   return sToolName;
225 }
226 
GetToolFingerprint()227 std::string GetToolFingerprint() {
228   // DO NOT UPDATE, this is more of a marketing version.
229   static const char* const sMajorVersion = "2";
230 
231   // Update minor version whenever a feature or flag is added.
232   static const char* const sMinorVersion = "19";
233 
234   // The build id of aapt2 binary.
235   static const std::string sBuildId = android::build::GetBuildNumber();
236 
237   return android::base::StringPrintf("%s.%s-%s", sMajorVersion, sMinorVersion, sBuildId.c_str());
238 }
239 
ConsumeDigits(const char * start,const char * end)240 static size_t ConsumeDigits(const char* start, const char* end) {
241   const char* c = start;
242   for (; c != end && *c >= '0' && *c <= '9'; c++) {
243   }
244   return static_cast<size_t>(c - start);
245 }
246 
VerifyJavaStringFormat(const StringPiece & str)247 bool VerifyJavaStringFormat(const StringPiece& str) {
248   const char* c = str.begin();
249   const char* const end = str.end();
250 
251   size_t arg_count = 0;
252   bool nonpositional = false;
253   while (c != end) {
254     if (*c == '%' && c + 1 < end) {
255       c++;
256 
257       if (*c == '%' || *c == 'n') {
258         c++;
259         continue;
260       }
261 
262       arg_count++;
263 
264       size_t num_digits = ConsumeDigits(c, end);
265       if (num_digits > 0) {
266         c += num_digits;
267         if (c != end && *c != '$') {
268           // The digits were a size, but not a positional argument.
269           nonpositional = true;
270         }
271       } else if (*c == '<') {
272         // Reusing last argument, bad idea since positions can be moved around
273         // during translation.
274         nonpositional = true;
275 
276         c++;
277 
278         // Optionally we can have a $ after
279         if (c != end && *c == '$') {
280           c++;
281         }
282       } else {
283         nonpositional = true;
284       }
285 
286       // Ignore size, width, flags, etc.
287       while (c != end && (*c == '-' || *c == '#' || *c == '+' || *c == ' ' ||
288                           *c == ',' || *c == '(' || (*c >= '0' && *c <= '9'))) {
289         c++;
290       }
291 
292       /*
293        * This is a shortcut to detect strings that are going to Time.format()
294        * instead of String.format()
295        *
296        * Comparison of String.format() and Time.format() args:
297        *
298        * String: ABC E GH  ST X abcdefgh  nost x
299        *   Time:    DEFGHKMS W Za  d   hkm  s w yz
300        *
301        * Therefore we know it's definitely Time if we have:
302        *     DFKMWZkmwyz
303        */
304       if (c != end) {
305         switch (*c) {
306           case 'D':
307           case 'F':
308           case 'K':
309           case 'M':
310           case 'W':
311           case 'Z':
312           case 'k':
313           case 'm':
314           case 'w':
315           case 'y':
316           case 'z':
317             return true;
318         }
319       }
320     }
321 
322     if (c != end) {
323       c++;
324     }
325   }
326 
327   if (arg_count > 1 && nonpositional) {
328     // Multiple arguments were specified, but some or all were non positional.
329     // Translated
330     // strings may rearrange the order of the arguments, which will break the
331     // string.
332     return false;
333   }
334   return true;
335 }
336 
Utf8ToModifiedUtf8(const std::string & utf8)337 std::string Utf8ToModifiedUtf8(const std::string& utf8) {
338   // Java uses Modified UTF-8 which only supports the 1, 2, and 3 byte formats of UTF-8. To encode
339   // 4 byte UTF-8 codepoints, Modified UTF-8 allows the use of surrogate pairs in the same format
340   // of CESU-8 surrogate pairs. Calculate the size of the utf8 string with all 4 byte UTF-8
341   // codepoints replaced with 2 3 byte surrogate pairs
342   size_t modified_size = 0;
343   const size_t size = utf8.size();
344   for (size_t i = 0; i < size; i++) {
345     if (((uint8_t) utf8[i] >> 4) == 0xF) {
346       modified_size += 6;
347       i += 3;
348     } else {
349       modified_size++;
350     }
351   }
352 
353   // Early out if no 4 byte codepoints are found
354   if (size == modified_size) {
355     return utf8;
356   }
357 
358   std::string output;
359   output.reserve(modified_size);
360   for (size_t i = 0; i < size; i++) {
361     if (((uint8_t) utf8[i] >> 4) == 0xF) {
362       int32_t codepoint = utf32_from_utf8_at(utf8.data(), size, i, nullptr);
363 
364       // Calculate the high and low surrogates as UTF-16 would
365       int32_t high = ((codepoint - 0x10000) / 0x400) + 0xD800;
366       int32_t low = ((codepoint - 0x10000) % 0x400) + 0xDC00;
367 
368       // Encode each surrogate in UTF-8
369       output.push_back((char) (0xE4 | ((high >> 12) & 0xF)));
370       output.push_back((char) (0x80 | ((high >> 6) & 0x3F)));
371       output.push_back((char) (0x80 | (high & 0x3F)));
372       output.push_back((char) (0xE4 | ((low >> 12) & 0xF)));
373       output.push_back((char) (0x80 | ((low >> 6) & 0x3F)));
374       output.push_back((char) (0x80 | (low & 0x3F)));
375       i += 3;
376     } else {
377       output.push_back(utf8[i]);
378     }
379   }
380 
381   return output;
382 }
383 
ModifiedUtf8ToUtf8(const std::string & modified_utf8)384 std::string ModifiedUtf8ToUtf8(const std::string& modified_utf8) {
385   // The UTF-8 representation will have a byte length less than or equal to the Modified UTF-8
386   // representation.
387   std::string output;
388   output.reserve(modified_utf8.size());
389 
390   size_t index = 0;
391   const size_t modified_size = modified_utf8.size();
392   while (index < modified_size) {
393     size_t next_index;
394     int32_t high_surrogate = utf32_from_utf8_at(modified_utf8.data(), modified_size, index,
395                                                 &next_index);
396     if (high_surrogate < 0) {
397       return {};
398     }
399 
400     // Check that the first codepoint is within the high surrogate range
401     if (high_surrogate >= 0xD800 && high_surrogate <= 0xDB7F) {
402       int32_t low_surrogate = utf32_from_utf8_at(modified_utf8.data(), modified_size, next_index,
403                                                  &next_index);
404       if (low_surrogate < 0) {
405         return {};
406       }
407 
408       // Check that the second codepoint is within the low surrogate range
409       if (low_surrogate >= 0xDC00 && low_surrogate <= 0xDFFF) {
410         const char32_t codepoint = (char32_t) (((high_surrogate - 0xD800) * 0x400)
411             + (low_surrogate - 0xDC00) + 0x10000);
412 
413         // The decoded codepoint should represent a 4 byte, UTF-8 character
414         const size_t utf8_length = (size_t) utf32_to_utf8_length(&codepoint, 1);
415         if (utf8_length != 4) {
416           return {};
417         }
418 
419         // Encode the UTF-8 representation of the codepoint into the string
420         char* start = &output[output.size()];
421         output.resize(output.size() + utf8_length);
422         utf32_to_utf8((char32_t*) &codepoint, 1, start, utf8_length + 1);
423 
424         index = next_index;
425         continue;
426       }
427     }
428 
429     // Append non-surrogate pairs to the output string
430     for (size_t i = index; i < next_index; i++) {
431       output.push_back(modified_utf8[i]);
432     }
433     index = next_index;
434   }
435   return output;
436 }
437 
Utf8ToUtf16(const StringPiece & utf8)438 std::u16string Utf8ToUtf16(const StringPiece& utf8) {
439   ssize_t utf16_length = utf8_to_utf16_length(
440       reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length());
441   if (utf16_length <= 0) {
442     return {};
443   }
444 
445   std::u16string utf16;
446   utf16.resize(utf16_length);
447   utf8_to_utf16(reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length(),
448                 &*utf16.begin(), utf16_length + 1);
449   return utf16;
450 }
451 
Utf16ToUtf8(const StringPiece16 & utf16)452 std::string Utf16ToUtf8(const StringPiece16& utf16) {
453   ssize_t utf8_length = utf16_to_utf8_length(utf16.data(), utf16.length());
454   if (utf8_length <= 0) {
455     return {};
456   }
457 
458   std::string utf8;
459   utf8.resize(utf8_length);
460   utf16_to_utf8(utf16.data(), utf16.length(), &*utf8.begin(), utf8_length + 1);
461   return utf8;
462 }
463 
WriteAll(std::ostream & out,const BigBuffer & buffer)464 bool WriteAll(std::ostream& out, const BigBuffer& buffer) {
465   for (const auto& b : buffer) {
466     if (!out.write(reinterpret_cast<const char*>(b.buffer.get()), b.size)) {
467       return false;
468     }
469   }
470   return true;
471 }
472 
Copy(const BigBuffer & buffer)473 std::unique_ptr<uint8_t[]> Copy(const BigBuffer& buffer) {
474   std::unique_ptr<uint8_t[]> data =
475       std::unique_ptr<uint8_t[]>(new uint8_t[buffer.size()]);
476   uint8_t* p = data.get();
477   for (const auto& block : buffer) {
478     memcpy(p, block.buffer.get(), block.size);
479     p += block.size;
480   }
481   return data;
482 }
483 
operator ++()484 typename Tokenizer::iterator& Tokenizer::iterator::operator++() {
485   const char* start = token_.end();
486   const char* end = str_.end();
487   if (start == end) {
488     end_ = true;
489     token_.assign(token_.end(), 0);
490     return *this;
491   }
492 
493   start += 1;
494   const char* current = start;
495   while (current != end) {
496     if (*current == separator_) {
497       token_.assign(start, current - start);
498       return *this;
499     }
500     ++current;
501   }
502   token_.assign(start, end - start);
503   return *this;
504 }
505 
operator ==(const iterator & rhs) const506 bool Tokenizer::iterator::operator==(const iterator& rhs) const {
507   // We check equality here a bit differently.
508   // We need to know that the addresses are the same.
509   return token_.begin() == rhs.token_.begin() &&
510          token_.end() == rhs.token_.end() && end_ == rhs.end_;
511 }
512 
operator !=(const iterator & rhs) const513 bool Tokenizer::iterator::operator!=(const iterator& rhs) const {
514   return !(*this == rhs);
515 }
516 
iterator(const StringPiece & s,char sep,const StringPiece & tok,bool end)517 Tokenizer::iterator::iterator(const StringPiece& s, char sep, const StringPiece& tok, bool end)
518     : str_(s), separator_(sep), token_(tok), end_(end) {}
519 
Tokenizer(const StringPiece & str,char sep)520 Tokenizer::Tokenizer(const StringPiece& str, char sep)
521     : begin_(++iterator(str, sep, StringPiece(str.begin() - 1, 0), false)),
522       end_(str, sep, StringPiece(str.end(), 0), true) {}
523 
ExtractResFilePathParts(const StringPiece & path,StringPiece * out_prefix,StringPiece * out_entry,StringPiece * out_suffix)524 bool ExtractResFilePathParts(const StringPiece& path, StringPiece* out_prefix,
525                              StringPiece* out_entry, StringPiece* out_suffix) {
526   const StringPiece res_prefix("res/");
527   if (!StartsWith(path, res_prefix)) {
528     return false;
529   }
530 
531   StringPiece::const_iterator last_occurence = path.end();
532   for (auto iter = path.begin() + res_prefix.size(); iter != path.end();
533        ++iter) {
534     if (*iter == '/') {
535       last_occurence = iter;
536     }
537   }
538 
539   if (last_occurence == path.end()) {
540     return false;
541   }
542 
543   auto iter = std::find(last_occurence, path.end(), '.');
544   *out_suffix = StringPiece(iter, path.end() - iter);
545   *out_entry = StringPiece(last_occurence + 1, iter - last_occurence - 1);
546   *out_prefix = StringPiece(path.begin(), last_occurence - path.begin() + 1);
547   return true;
548 }
549 
GetString16(const android::ResStringPool & pool,size_t idx)550 StringPiece16 GetString16(const android::ResStringPool& pool, size_t idx) {
551   if (auto str = pool.stringAt(idx); str.ok()) {
552     return *str;
553   }
554   return StringPiece16();
555 }
556 
GetString(const android::ResStringPool & pool,size_t idx)557 std::string GetString(const android::ResStringPool& pool, size_t idx) {
558   if (auto str = pool.string8At(idx); str.ok()) {
559     return ModifiedUtf8ToUtf8(str->to_string());
560   }
561   return Utf16ToUtf8(GetString16(pool, idx));
562 }
563 
564 }  // namespace util
565 }  // namespace aapt
566