• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <array>
18 #include <cstdint>
19 #include <cstdlib>
20 #include <cstring>
21 #include <string>
22 #include <unordered_map>
23 #include <unordered_set>
24 
25 #include <androidfw/LocaleData.h>
26 #include <androidfw/LocaleDataLookup.h>
27 
28 namespace android {
29 
30 const uint32_t PACKED_ROOT = 0; // to represent the root locale
31 const uint32_t MAX_PARENT_DEPTH = getMaxAncestorTreeDepth();
32 
findParent(uint32_t packed_locale,const char * script)33 uint32_t findParent(uint32_t packed_locale, const char* script) {
34     if (hasRegion(packed_locale)) {
35         auto parent_key = findParentLocalePackedKey(script, packed_locale);
36         if (parent_key != 0) {
37             return parent_key;
38         }
39         return dropRegion(packed_locale);
40     }
41     return PACKED_ROOT;
42 }
43 
44 // Find the ancestors of a locale, and fill 'out' with it (assumes out has enough
45 // space). If any of the members of stop_list was seen, write it in the
46 // output but stop afterwards.
47 //
48 // This also outputs the index of the last written ancestor in the stop_list
49 // to stop_list_index, which will be -1 if it is not found in the stop_list.
50 //
51 // Returns the number of ancestors written in the output, which is always
52 // at least one.
53 //
54 // (If 'out' is nullptr, we do everything the same way but we simply don't write
55 // any results in 'out'.)
findAncestors(uint32_t * out,ssize_t * stop_list_index,uint32_t packed_locale,const char * script,const uint32_t * stop_list,size_t stop_set_length)56 size_t findAncestors(uint32_t* out, ssize_t* stop_list_index,
57                      uint32_t packed_locale, const char* script,
58                      const uint32_t* stop_list, size_t stop_set_length) {
59     uint32_t ancestor = packed_locale;
60     size_t count = 0;
61     do {
62         if (out != nullptr) out[count] = ancestor;
63         count++;
64         for (size_t i = 0; i < stop_set_length; i++) {
65             if (stop_list[i] == ancestor) {
66                 *stop_list_index = (ssize_t) i;
67                 return count;
68             }
69         }
70         ancestor = findParent(ancestor, script);
71     } while (ancestor != PACKED_ROOT);
72     *stop_list_index = (ssize_t) -1;
73     return count;
74 }
75 
findDistance(uint32_t supported,const char * script,const uint32_t * request_ancestors,size_t request_ancestors_count)76 size_t findDistance(uint32_t supported,
77                     const char* script,
78                     const uint32_t* request_ancestors,
79                     size_t request_ancestors_count) {
80     ssize_t request_ancestors_index;
81     const size_t supported_ancestor_count = findAncestors(
82             nullptr, &request_ancestors_index,
83             supported, script,
84             request_ancestors, request_ancestors_count);
85     // Since both locales share the same root, there will always be a shared
86     // ancestor, so the distance in the parent tree is the sum of the distance
87     // of 'supported' to the lowest common ancestor (number of ancestors
88     // written for 'supported' minus 1) plus the distance of 'request' to the
89     // lowest common ancestor (the index of the ancestor in request_ancestors).
90     return supported_ancestor_count + request_ancestors_index - 1;
91 }
92 
93 const uint32_t US_SPANISH = 0x65735553LU; // es-US
94 const uint32_t MEXICAN_SPANISH = 0x65734D58LU; // es-MX
95 const uint32_t LATIN_AMERICAN_SPANISH = 0x6573A424LU; // es-419
96 
97 // The two locales es-US and es-MX are treated as special fallbacks for es-419.
98 // If there is no es-419, they are considered its equivalent.
isSpecialSpanish(uint32_t language_and_region)99 inline bool isSpecialSpanish(uint32_t language_and_region) {
100     return (language_and_region == US_SPANISH || language_and_region == MEXICAN_SPANISH);
101 }
102 
localeDataCompareRegions(const char * left_region,const char * right_region,const char * requested_language,const char * requested_script,const char * requested_region)103 int localeDataCompareRegions(
104         const char* left_region, const char* right_region,
105         const char* requested_language, const char* requested_script,
106         const char* requested_region) {
107 
108     if (left_region[0] == right_region[0] && left_region[1] == right_region[1]) {
109         return 0;
110     }
111     uint32_t left = packLocale(requested_language, left_region);
112     uint32_t right = packLocale(requested_language, right_region);
113     const uint32_t request = packLocale(requested_language, requested_region);
114 
115     // If one and only one of the two locales is a special Spanish locale, we
116     // replace it with es-419. We don't do the replacement if the other locale
117     // is already es-419, or both locales are special Spanish locales (when
118     // es-US is being compared to es-MX).
119     const bool leftIsSpecialSpanish = isSpecialSpanish(left);
120     const bool rightIsSpecialSpanish = isSpecialSpanish(right);
121     if (leftIsSpecialSpanish && !rightIsSpecialSpanish && right != LATIN_AMERICAN_SPANISH) {
122         left = LATIN_AMERICAN_SPANISH;
123     } else if (rightIsSpecialSpanish && !leftIsSpecialSpanish && left != LATIN_AMERICAN_SPANISH) {
124         right = LATIN_AMERICAN_SPANISH;
125     }
126 
127     uint32_t request_ancestors[MAX_PARENT_DEPTH+1];
128     ssize_t left_right_index;
129     // Find the parents of the request, but stop as soon as we saw left or right
130     const std::array<uint32_t, 2> left_and_right = {{left, right}};
131     const size_t ancestor_count = findAncestors(
132             request_ancestors, &left_right_index,
133             request, requested_script,
134             left_and_right.data(), left_and_right.size());
135     if (left_right_index == 0) { // We saw left earlier
136         return 1;
137     }
138     if (left_right_index == 1) { // We saw right earlier
139         return -1;
140     }
141 
142     // If we are here, neither left nor right are an ancestor of the
143     // request. This means that all the ancestors have been computed and
144     // the last ancestor is just the language by itself. We will use the
145     // distance in the parent tree for determining the better match.
146     const size_t left_distance = findDistance(
147             left, requested_script, request_ancestors, ancestor_count);
148     const size_t right_distance = findDistance(
149             right, requested_script, request_ancestors, ancestor_count);
150     if (left_distance != right_distance) {
151         return (int) right_distance - (int) left_distance; // smaller distance is better
152     }
153 
154     // If we are here, left and right are equidistant from the request. We will
155     // try and see if any of them is a representative locale.
156     const bool left_is_representative = isLocaleRepresentative(left, requested_script);
157     const bool right_is_representative = isLocaleRepresentative(right, requested_script);
158     if (left_is_representative != right_is_representative) {
159         return (int) left_is_representative - (int) right_is_representative;
160     }
161 
162     // We have no way of figuring out which locale is a better match. For
163     // the sake of stability, we consider the locale with the lower region
164     // code (in dictionary order) better, with two-letter codes before
165     // three-digit codes (since two-letter codes are more specific).
166     return (int64_t) right - (int64_t) left;
167 }
168 
localeDataComputeScript(char out[4],const char * language,const char * region)169 void localeDataComputeScript(char out[4], const char* language, const char* region) {
170     if (language[0] == '\0') {
171         memset(out, '\0', SCRIPT_LENGTH);
172         return;
173     }
174     uint32_t lookup_key = packLocale(language, region);
175     auto lookup_result = lookupLikelyScript(lookup_key);
176     if (lookup_result == nullptr) {
177         // We couldn't find the locale. Let's try without the region
178         if (region[0] != '\0') {
179             lookup_key = dropRegion(lookup_key);
180             lookup_result = lookupLikelyScript(lookup_key);
181             if (lookup_result != nullptr) {
182                 memcpy(out, lookup_result, SCRIPT_LENGTH);
183                 return;
184             }
185         }
186         // We don't know anything about the locale
187         memset(out, '\0', SCRIPT_LENGTH);
188         return;
189     } else {
190         // We found the locale.
191         memcpy(out, lookup_result, SCRIPT_LENGTH);
192     }
193 }
194 
195 const uint32_t ENGLISH_STOP_LIST[2] = {
196     0x656E0000LU, // en
197     0x656E8400LU, // en-001
198 };
199 const char ENGLISH_CHARS[2] = {'e', 'n'};
200 const char LATIN_CHARS[4] = {'L', 'a', 't', 'n'};
201 
localeDataIsCloseToUsEnglish(const char * region)202 bool localeDataIsCloseToUsEnglish(const char* region) {
203     const uint32_t locale = packLocale(ENGLISH_CHARS, region);
204     ssize_t stop_list_index;
205     findAncestors(nullptr, &stop_list_index, locale, LATIN_CHARS, ENGLISH_STOP_LIST, 2);
206     // A locale is like US English if we see "en" before "en-001" in its ancestor list.
207     return stop_list_index == 0; // 'en' is first in ENGLISH_STOP_LIST
208 }
209 
210 } // namespace android
211