• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2014 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <libaddressinput/address_formatter.h>
16 
17 #include <libaddressinput/address_data.h>
18 #include <libaddressinput/address_field.h>
19 #include <libaddressinput/util/basictypes.h>
20 
21 #include <algorithm>
22 #include <cassert>
23 #include <cstddef>
24 #include <functional>
25 #include <string>
26 #include <vector>
27 
28 #include "format_element.h"
29 #include "language.h"
30 #include "region_data_constants.h"
31 #include "rule.h"
32 #include "util/cctype_tolower_equal.h"
33 
34 namespace i18n {
35 namespace addressinput {
36 
37 namespace {
38 
39 const char kCommaSeparator[] = ", ";
40 const char kSpaceSeparator[] = " ";
41 const char kArabicCommaSeparator[] = "\xD8\x8C" " ";  /* "، " */
42 
43 const char* kLanguagesThatUseSpace[] = {
44   "th",
45   "ko"
46 };
47 
48 const char* kLanguagesThatHaveNoSeparator[] = {
49   "ja",
50   "zh"  // All Chinese variants.
51 };
52 
53 // This data is based on CLDR, for languages that are in official use in some
54 // country, where Arabic is the most likely script tag.
55 // TODO: Consider supporting variants such as tr-Arab by detecting the script
56 // code.
57 const char* kLanguagesThatUseAnArabicComma[] = {
58   "ar",
59   "az",
60   "fa",
61   "kk",
62   "ku",
63   "ky",
64   "ps",
65   "tg",
66   "tk",
67   "ur",
68   "uz"
69 };
70 
GetLineSeparatorForLanguage(const std::string & language_tag)71 std::string GetLineSeparatorForLanguage(const std::string& language_tag) {
72   Language address_language(language_tag);
73 
74   // First deal with explicit script tags.
75   if (address_language.has_latin_script) {
76     return kCommaSeparator;
77   }
78 
79   // Now guess something appropriate based on the base language.
80   const std::string& base_language = address_language.base;
81   if (std::find_if(kLanguagesThatUseSpace,
82                    kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace),
83                    std::bind2nd(EqualToTolowerString(), base_language)) !=
84       kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace)) {
85     return kSpaceSeparator;
86   } else if (std::find_if(
87                  kLanguagesThatHaveNoSeparator,
88                  kLanguagesThatHaveNoSeparator +
89                      arraysize(kLanguagesThatHaveNoSeparator),
90                  std::bind2nd(EqualToTolowerString(), base_language)) !=
91              kLanguagesThatHaveNoSeparator +
92                  arraysize(kLanguagesThatHaveNoSeparator)) {
93     return "";
94   } else if (std::find_if(
95                  kLanguagesThatUseAnArabicComma,
96                  kLanguagesThatUseAnArabicComma +
97                      arraysize(kLanguagesThatUseAnArabicComma),
98                  std::bind2nd(EqualToTolowerString(), base_language)) !=
99              kLanguagesThatUseAnArabicComma +
100                  arraysize(kLanguagesThatUseAnArabicComma)) {
101     return kArabicCommaSeparator;
102   }
103   // Either the language is a Latin-script language, or no language was
104   // specified. In the latter case we still return ", " as the most common
105   // separator in use. In countries that don't use this, e.g. Thailand,
106   // addresses are often written in Latin script where this would still be
107   // appropriate, so this is a reasonable default in the absence of information.
108   return kCommaSeparator;
109 }
110 
CombineLinesForLanguage(const std::vector<std::string> & lines,const std::string & language_tag,std::string * line)111 void CombineLinesForLanguage(const std::vector<std::string>& lines,
112                              const std::string& language_tag,
113                              std::string* line) {
114   line->clear();
115   std::string separator = GetLineSeparatorForLanguage(language_tag);
116   for (std::vector<std::string>::const_iterator it = lines.begin();
117        it != lines.end();
118        ++it) {
119     if (it != lines.begin()) {
120       line->append(separator);
121     }
122     line->append(*it);
123   }
124 }
125 
126 }  // namespace
127 
GetFormattedNationalAddress(const AddressData & address_data,std::vector<std::string> * lines)128 void GetFormattedNationalAddress(
129     const AddressData& address_data, std::vector<std::string>* lines) {
130   assert(lines != NULL);
131   lines->clear();
132 
133   Rule rule;
134   rule.CopyFrom(Rule::GetDefault());
135   // TODO: Eventually, we should get the best rule for this country and
136   // language, rather than just for the country.
137   rule.ParseSerializedRule(RegionDataConstants::GetRegionData(
138       address_data.region_code));
139 
140   Language language(address_data.language_code);
141 
142   // If Latin-script rules are available and the |language_code| of this address
143   // is explicitly tagged as being Latin, then use the Latin-script formatting
144   // rules.
145   const std::vector<FormatElement>& format =
146       language.has_latin_script && !rule.GetLatinFormat().empty()
147           ? rule.GetLatinFormat()
148           : rule.GetFormat();
149 
150   // Address format without the unnecessary elements (based on which address
151   // fields are empty). We assume all literal strings that are not at the start
152   // or end of a line are separators, and therefore only relevant if the
153   // surrounding fields are filled in. This works with the data we have
154   // currently.
155   std::vector<FormatElement> pruned_format;
156   for (std::vector<FormatElement>::const_iterator
157        element_it = format.begin();
158        element_it != format.end();
159        ++element_it) {
160     // Always keep the newlines.
161     if (element_it->IsNewline() ||
162         // Always keep the non-empty address fields.
163         (element_it->IsField() &&
164          !address_data.IsFieldEmpty(element_it->GetField())) ||
165         // Only keep literals that satisfy these 2 conditions:
166         (!element_it->IsField() &&
167          // (1) Not preceding an empty field.
168          (element_it + 1 == format.end() ||
169           !(element_it + 1)->IsField() ||
170           !address_data.IsFieldEmpty((element_it + 1)->GetField())) &&
171          // (2) Not following a removed field.
172          (element_it == format.begin() ||
173           !(element_it - 1)->IsField() ||
174           (!pruned_format.empty() && pruned_format.back().IsField())))) {
175       pruned_format.push_back(*element_it);
176     }
177   }
178 
179   std::string line;
180   for (std::vector<FormatElement>::const_iterator
181        element_it = pruned_format.begin();
182        element_it != pruned_format.end();
183        ++element_it) {
184     if (element_it->IsNewline()) {
185       if (!line.empty()) {
186         lines->push_back(line);
187         line.clear();
188       }
189     } else if (element_it->IsField()) {
190       AddressField field = element_it->GetField();
191       if (field == STREET_ADDRESS) {
192         // The field "street address" represents the street address lines of an
193         // address, so there can be multiple values.
194         if (!address_data.IsFieldEmpty(field)) {
195           line.append(address_data.address_line.front());
196           if (address_data.address_line.size() > 1U) {
197             lines->push_back(line);
198             line.clear();
199             lines->insert(lines->end(),
200                           address_data.address_line.begin() + 1,
201                           address_data.address_line.end());
202           }
203         }
204       } else {
205         line.append(address_data.GetFieldValue(field));
206       }
207     } else {
208       line.append(element_it->GetLiteral());
209     }
210   }
211   if (!line.empty()) {
212     lines->push_back(line);
213   }
214 }
215 
GetFormattedNationalAddressLine(const AddressData & address_data,std::string * line)216 void GetFormattedNationalAddressLine(
217     const AddressData& address_data, std::string* line) {
218   std::vector<std::string> address_lines;
219   GetFormattedNationalAddress(address_data, &address_lines);
220   CombineLinesForLanguage(address_lines, address_data.language_code, line);
221 }
222 
GetStreetAddressLinesAsSingleLine(const AddressData & address_data,std::string * line)223 void GetStreetAddressLinesAsSingleLine(
224     const AddressData& address_data, std::string* line) {
225   CombineLinesForLanguage(
226       address_data.address_line, address_data.language_code, line);
227 }
228 
229 }  // namespace addressinput
230 }  // namespace i18n
231