1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "text/Unicode.h"
18
19 #include <algorithm>
20 #include <array>
21
22 #include "text/Utf8Iterator.h"
23
24 using ::android::StringPiece;
25
26 namespace aapt {
27 namespace text {
28
29 namespace {
30
31 struct CharacterProperties {
32 enum : uint32_t {
33 kXidStart = 1 << 0,
34 kXidContinue = 1 << 1,
35 };
36
37 char32_t first_char;
38 char32_t last_char;
39 uint32_t properties;
40 };
41
42 // Incude the generated data table.
43 #include "text/Unicode_data.cpp"
44
CompareCharacterProperties(const CharacterProperties & a,char32_t codepoint)45 bool CompareCharacterProperties(const CharacterProperties& a, char32_t codepoint) {
46 return a.last_char < codepoint;
47 }
48
FindCharacterProperties(char32_t codepoint)49 uint32_t FindCharacterProperties(char32_t codepoint) {
50 const auto iter_end = sCharacterProperties.end();
51 const auto iter = std::lower_bound(sCharacterProperties.begin(), iter_end, codepoint,
52 CompareCharacterProperties);
53 if (iter != iter_end && codepoint >= iter->first_char) {
54 return iter->properties;
55 }
56 return 0u;
57 }
58
59 } // namespace
60
IsXidStart(char32_t codepoint)61 bool IsXidStart(char32_t codepoint) {
62 return FindCharacterProperties(codepoint) & CharacterProperties::kXidStart;
63 }
64
IsXidContinue(char32_t codepoint)65 bool IsXidContinue(char32_t codepoint) {
66 return FindCharacterProperties(codepoint) & CharacterProperties::kXidContinue;
67 }
68
69 // Hardcode the White_Space characters since they are few and the external/icu project doesn't
70 // list them as data files to parse.
71 // Sourced from http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
IsWhitespace(char32_t codepoint)72 bool IsWhitespace(char32_t codepoint) {
73 return (codepoint >= 0x0009 && codepoint <= 0x000d) || (codepoint == 0x0020) ||
74 (codepoint == 0x0085) || (codepoint == 0x00a0) || (codepoint == 0x1680) ||
75 (codepoint >= 0x2000 && codepoint <= 0x200a) || (codepoint == 0x2028) ||
76 (codepoint == 0x2029) || (codepoint == 0x202f) || (codepoint == 0x205f) ||
77 (codepoint == 0x3000);
78 }
79
IsJavaIdentifier(const StringPiece & str)80 bool IsJavaIdentifier(const StringPiece& str) {
81 Utf8Iterator iter(str);
82
83 // Check the first character.
84 if (!iter.HasNext()) {
85 return false;
86 }
87
88 if (!IsXidStart(iter.Next())) {
89 return false;
90 }
91
92 while (iter.HasNext()) {
93 const char32_t codepoint = iter.Next();
94 if (!IsXidContinue(codepoint) && codepoint != U'$') {
95 return false;
96 }
97 }
98 return true;
99 }
100
IsValidResourceEntryName(const StringPiece & str)101 bool IsValidResourceEntryName(const StringPiece& str) {
102 Utf8Iterator iter(str);
103
104 // Check the first character.
105 if (!iter.HasNext()) {
106 return false;
107 }
108
109 // Resources are allowed to start with '_'
110 const char32_t first_codepoint = iter.Next();
111 if (!IsXidStart(first_codepoint) && first_codepoint != U'_') {
112 return false;
113 }
114
115 while (iter.HasNext()) {
116 const char32_t codepoint = iter.Next();
117 if (!IsXidContinue(codepoint) && codepoint != U'.' && codepoint != U'-') {
118 return false;
119 }
120 }
121 return true;
122 }
123
124 } // namespace text
125 } // namespace aapt
126